summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2006-08-24 06:23:03 +0000
committerJens Arnold <amiconn@rockbox.org>2006-08-24 06:23:03 +0000
commit92fe88a3a7318903b1eb377f92bea750d8f66068 (patch)
tree47a7c2c529acb229318cb84cdeb15dd1915b5e9e
parent9b981429136799c0cc666b107242252ccc0c2bc4 (diff)
downloadrockbox-92fe88a3a7318903b1eb377f92bea750d8f66068.zip
rockbox-92fe88a3a7318903b1eb377f92bea750d8f66068.tar.gz
rockbox-92fe88a3a7318903b1eb377f92bea750d8f66068.tar.bz2
rockbox-92fe88a3a7318903b1eb377f92bea750d8f66068.tar.xz
X5: Applied tweaks from the H300 lcd_yuv_blit to the X5 version. Smaller code and ca. 1% speedup.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10729 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--firmware/target/coldfire/iaudio/x5/lcd-as-x5.S476
1 files changed, 207 insertions, 269 deletions
diff --git a/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S b/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S
index 1a527bb..7c89fb9 100644
--- a/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S
+++ b/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S
@@ -40,316 +40,254 @@
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Scaled, normalized, rounded and tweaked to yield RGB 666:
- * |R| |74 0 102| |Y' - 16| / 256
- * |G| = |74 -25 -52| |Cb - 128| / 256
- * |B| |74 129 0| |Cr - 128| / 256
+ * |R| |74 0 101| |Y' - 16| / 256
+ * |G| = |74 -24 -51| |Cb - 128| / 256
+ * |B| |74 128 0| |Cr - 128| / 256
*/
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines,@function
lcd_write_yuv420_lines:
- lea.l (-40,%sp),%sp /* free up some registers */
- movem.l %d2-%d7/%a2-%a5,(%sp)
+ lea.l (-36,%sp),%sp /* free up some registers */
+ movem.l %d2-%d6/%a2-%a5,(%sp)
- lea.l 0xf0008002,%a0 /* LCD data port */
- move.l (40+4,%sp),%a1 /* Y data */
- move.l (40+8,%sp),%a2 /* Cb data */
- move.l (40+12,%sp),%a3 /* guv storage */
- move.l (40+16,%sp),%a4 /* Cr data */
- move.l (40+20,%sp),%d0 /* width */
- lea.l (%a1,%d0.l),%a5 /* end address */
+ lea.l 0xf0008002,%a0 /* LCD data port */
+ movem.l (36+4,%sp),%a1-%a5 /* Y data, Cb data, guv storage, Cr data, width */
+ lea.l (%a1,%a5),%a5 /* end address */
.yuv_line_loop1:
/** Write first pixel **/
- clr.l %d1 /* get y component */
- move.b (%a1)+,%d1
- subq.l #8,%d1
- subq.l #8,%d1
- moveq.l #74,%d6
- muls.w %d6,%d1
- asr.l #8,%d1
-
- clr %d2 /* get bu component */
- move.b (%a2),%d2
- moveq.l #-128,%d6
- add.l %d6,%d2
- move.l %d2,%d3 /* %d3 = cb component for guv */
- move.w #129,%d6
- muls.w %d6,%d2
+ clr.l %d1 /* get bu component */
+ move.b (%a2),%d1
+ clr.l %d3 /* get rv component */
+ move.b (%a4),%d3
+ moveq.l #-128,%d0
+ add.l %d0,%d1
+ add.l %d0,%d3
+
+ move.l %d1,%d2 /* %d2 = cb component for guv */
+ asr.l #1,%d1 /* %d1 = 128 * (Cb - 128) / 256 */
+ move.b %d1,(%a2)+ /* save bu for next line */
+ moveq.l #-24,%d0 /* multiply first term of guv */
+ muls.w %d0,%d2
+ moveq.l #-51,%d0 /* multiply second term of guv */
+ muls.w %d3,%d0
+ add.l %d0,%d2
asr.l #8,%d2
- move.b %d2,(%a2)+ /* save bu for next line */
-
- moveq.l #-25,%d6 /* multiply first term of guv */
- muls.w %d6,%d3
-
- clr %d4 /* get rv component */
- move.b (%a4),%d4
- moveq.l #-128,%d6
- add.l %d6,%d4
- move.l %d4,%d7 /* %d7 = cr component for guv */
- moveq.l #102,%d6
- muls.w %d6,%d4
+ move.b %d2,(%a3)+ /* save guv for next line */
+ moveq.l #101,%d0
+ muls.w %d0,%d3
+ asr.l #8,%d3
+ move.b %d3,(%a4)+ /* save rv for next line */
+
+ clr.l %d4 /* get y component */
+ move.b (%a1)+,%d4
+ moveq.l #74,%d0
+ muls.w %d0,%d4
asr.l #8,%d4
- move.b %d4,(%a4)+ /* save rv for next line */
+ subq.l #4,%d4
+ move.l %d4,%d5
+ move.l %d4,%d6
+ /* : %d4,%d5,%d6 = Y, %d1 = bu, %d2 = guv, %d3 = rv */
- moveq.l #-52,%d6 /* multiply second term of guv */
- muls.w %d6,%d7
- add.l %d7,%d3
- asr.l #8,%d3
- move.b %d3,(%a3)+ /* save guv for next line */
- /* : %d1 = Y, %d2 = bu, %d3 = guv, %d4 = rv */
-
- move.l %d1,%d5 /* get r */
- add.l %d4,%d5
- move.l %d1,%d6 /* get g */
- add.l %d3,%d6
- move.l %d1,%d7 /* get b */
- add.l %d2,%d7
-
- move.l %d7,%d1 /* is clamping needed? */
- or.l %d6,%d1
- or.l %d5,%d1
- asr.l #6,%d1
+ add.l %d3,%d4 /* get r */
+ add.l %d2,%d5 /* get g */
+ add.l %d1,%d6 /* get b */
+
+ move.l %d6,%d0 /* is clamping needed? */
+ or.l %d5,%d0
+ or.l %d4,%d0
+ asr.l #6,%d0
beq.b .yuv_no_clamp1 /* values in range: skip clamping */
- bpl.b .yuv_r63_test1 /* no negative values: skip to high bounds checks */
-.yuv_r0_test1:
- clr.l %d1 /* check for any values < 0 */
- cmp.l %d1,%d5
- bgt.b .yuv_g0_test1
- clr.l %d5
-.yuv_g0_test1:
- cmp.l %d1,%d6
- bgt.b .yuv_b0_test1
- clr.l %d6
-.yuv_b0_test1:
- cmp.l %d1,%d7
- bgt.b .yuv_r63_test1
- clr.l %d7
-.yuv_r63_test1: /* check for any values > 63 */
- moveq.l #63,%d1
- cmp.l %d1,%d5
- blt.b .yuv_g63_test1
- move.l %d1,%d5
-.yuv_g63_test1:
- cmp.l %d1,%d6
- blt.b .yuv_b63_test1
- move.l %d1,%d6
-.yuv_b63_test1:
- cmp.l %d1,%d7
- blt.b .yuv_no_clamp1
- move.l %d1,%d7
+ moveq.l #63, %d0
+ cmp.l %d0, %d4
+ bls.s .yuv_red_ok1
+ spl.b %d4
+ and.l %d0, %d4
+.yuv_red_ok1:
+ cmp.l %d0, %d5
+ bls.s .yuv_green_ok1
+ spl.b %d5
+ and.l %d0, %d5
+.yuv_green_ok1:
+ cmp.l %d0, %d6
+ bls.s .yuv_blue_ok1
+ spl.b %d6
+ and.l %d0, %d6
+.yuv_blue_ok1:
.yuv_no_clamp1:
- /* : %d5 = R, %d6 = G, %d7 = B */
+ /* : %d4 = R, %d5 = G, %d6 = B */
- move.l %d6,%d1 /* save g for lower 9 bits */
- lsl.l #3,%d5 /* R << 3 */
- lsr.l #3,%d1 /* G >> 3 */
- or.l %d5,%d1
- move.w %d1,(%a0) /* |00000000|000000000|0000000r|rrrrrggg| */
- lsl.l #6,%d6 /* B << 6 */
- or.l %d6,%d7 /* |00000000|000000000|0000gggg|ggbbbbbb| */
- move.w %d7,(%a0)
+ move.l %d5,%d0 /* save g for lower 9 bits */
+ lsl.l #3,%d4 /* R << 3 */
+ lsr.l #3,%d0 /* G >> 3 */
+ or.l %d4,%d0
+ move.w %d0,(%a0) /* |00000000|000000000|0000000r|rrrrrggg| */
+ lsl.l #6,%d5 /* B << 6 */
+ or.l %d5,%d6 /* |00000000|000000000|0000gggg|ggbbbbbb| */
+ move.w %d6,(%a0)
/** Write second pixel **/
- clr %d1
- move.b (%a1)+,%d1 /* get y component */
- subq.l #8,%d1
- subq.l #8,%d1
- moveq.l #74,%d6
- muls.w %d6,%d1
- asr.l #8,%d1
- /* : %d1 = Y, %d2 = bu, %d3 = guv, %d4 = rv */
-
- /* Add Y + each chroma component (can clobber %d2-%d4 values now) */
- add.l %d1,%d4 /* get r */
- add.l %d1,%d3 /* get g */
- add.l %d1,%d2 /* get b */
-
- move.l %d2,%d1 /* is clamping needed? */
- or.l %d3,%d1
- or.l %d4,%d1
- asr.l #6,%d1
- beq.b .yuv_no_clamp2 /* values in range: skip clamping */
- bpl.b .yuv_r63_test2 /* no negative values: skip to high bounds checks */
-.yuv_r0_test2:
- clr.l %d1 /* check for any values < 0 */
- cmp.l %d1,%d4
- bgt.b .yuv_g0_test2
clr.l %d4
-.yuv_g0_test2:
- cmp.l %d1,%d3
- bgt.b .yuv_b0_test2
- clr.l %d3
-.yuv_b0_test2:
- cmp.l %d1,%d2
- bgt.b .yuv_r63_test2
- clr.l %d2
-.yuv_r63_test2: /* check for any values > 63 */
- moveq.l #63,%d1
- cmp.l %d1,%d4
- blt.b .yuv_g63_test2
- move.l %d1,%d4
-.yuv_g63_test2:
- cmp.l %d1,%d3
- blt.b .yuv_b63_test2
- move.l %d1,%d3
-.yuv_b63_test2:
- cmp.l %d1,%d2
- blt.b .yuv_no_clamp2
- move.l %d1,%d2
+ move.b (%a1)+,%d4 /* get y component */
+ moveq.l #74,%d0
+ muls.w %d0,%d4
+ asr.l #8,%d4
+ subq.l #4,%d4
+ /* : %d4 = Y, %d1 = bu, %d2 = guv, %d3 = rv */
+
+ /* Add Y + each chroma component (can clobber %d1-%d3 values now) */
+ add.l %d4,%d3 /* get r */
+ add.l %d4,%d2 /* get g */
+ add.l %d4,%d1 /* get b */
+
+ move.l %d1,%d0 /* is clamping needed? */
+ or.l %d2,%d0
+ or.l %d3,%d0
+ asr.l #6,%d0
+ beq.b .yuv_no_clamp2 /* values in range: skip clamping */
+ moveq.l #63, %d0
+ cmp.l %d0, %d3
+ bls.s .yuv_red_ok2
+ spl.b %d3
+ and.l %d0, %d3
+.yuv_red_ok2:
+ cmp.l %d0, %d2
+ bls.s .yuv_green_ok2
+ spl.b %d2
+ and.l %d0, %d2
+.yuv_green_ok2:
+ cmp.l %d0, %d1
+ bls.s .yuv_blue_ok2
+ spl.b %d1
+ and.l %d0, %d1
+.yuv_blue_ok2:
.yuv_no_clamp2:
- /* : %d4 = R, %d3 = G, %d2 = B */
-
- move.l %d3,%d1 /* save g for lower 9 bits */
- lsl.l #3,%d4 /* R << 3 */
- lsr.l #3,%d1 /* G >> 3 */
- or.l %d4,%d1 /* |00000000|000000000|0000000r|rrrrrggg| */
+ /* : %d3 = R, %d2 = G, %d1 = B */
+
+ move.l %d2,%d0 /* save g for lower 9 bits */
+ lsl.l #3,%d3 /* R << 3 */
+ lsr.l #3,%d0 /* G >> 3 */
+ or.l %d3,%d0 /* |00000000|000000000|0000000r|rrrrrggg| */
+ move.w %d0,(%a0)
+ lsl.l #6,%d2 /* G << 6 */
+ or.l %d2,%d1 /* |00000000|000000000|0000gggg|ggbbbbbb| */
move.w %d1,(%a0)
- lsl.l #6,%d3 /* G << 6 */
- or.l %d3,%d2 /* |00000000|000000000|0000gggg|ggbbbbbb| */
- move.w %d2,(%a0)
cmp.l %a1,%a5 /* run %a1 up to end of line */
bhi.w .yuv_line_loop1
/* Rewind chroma pointers */
- move.l (40+8,%sp),%a2 /* bu data */
- move.l (40+12,%sp),%a3 /* guv data */
- move.l (40+16,%sp),%a4 /* rv data */
- lea.l (%a5,%d0),%a5 /* next end address */
+ movem.l (36+8, %sp), %a2-%a5 /* bu data, guv data, rv data, width */
+ lea.l (%a1, %a5), %a5 /* next end address */
.yuv_line_loop2:
- clr %d1
- move.b (%a1)+,%d1 /* get y component */
- subq.l #8,%d1
- subq.l #8,%d1
- moveq.l #74,%d6
- muls.w %d6,%d1
- asr.l #8,%d1
-
- move.b (%a2)+,%d2 /* read save chromas and sign extend */
+ move.b (%a2)+,%d1 /* read save chromas and sign extend */
+ extb.l %d1
+ move.b (%a3)+,%d2
extb.l %d2
- move.b (%a3)+,%d3
+ move.b (%a4)+,%d3
extb.l %d3
- move.b (%a4)+,%d4
- extb.l %d4
- /* : %d1 = Y, %d2 = bu, %d3 = guv, %d4 = rv */
-
- move.l %d1,%d5 /* get r */
- add.l %d4,%d5
- move.l %d1,%d6 /* get g */
- add.l %d3,%d6
- move.l %d1,%d7 /* get b */
- add.l %d2,%d7
-
- move.l %d7,%d1 /* is clamping needed? */
- or.l %d6,%d1
- or.l %d5,%d1
- asr.l #6,%d1
+
+ clr.l %d4
+ move.b (%a1)+,%d4 /* get y component */
+ moveq.l #74,%d0
+ muls.w %d0,%d4
+ asr.l #8,%d4
+ subq.l #4,%d4
+ move.l %d4,%d5
+ move.l %d4,%d6
+ /* : %d4,%d5,%d6 = Y, %d1 = bu, %d2 = guv, %d3 = rv */
+
+ add.l %d3,%d4 /* get r */
+ add.l %d2,%d5 /* get g */
+ add.l %d1,%d6 /* get b */
+
+ move.l %d6,%d0 /* is clamping needed? */
+ or.l %d5,%d0
+ or.l %d4,%d0
+ asr.l #6,%d0
beq.b .yuv_no_clamp3 /* values in range: skip clamping */
- bpl.b .yuv_r63_test3 /* no negative values: skip to high bounds checks */
-.yuv_r0_test3:
- clr.l %d1 /* check for any values < 0 */
- cmp.l %d1,%d5
- bgt.b .yuv_g0_test3
- clr.l %d5
-.yuv_g0_test3:
- cmp.l %d1,%d6
- bgt.b .yuv_b0_test3
- clr.l %d6
-.yuv_b0_test3:
- cmp.l %d1,%d7
- bgt.b .yuv_r63_test3
- clr.l %d7
-.yuv_r63_test3: /* check for any values > 63 */
- moveq.l #63,%d1
- cmp.l %d1,%d5
- blt.b .yuv_g63_test3
- move.l %d1,%d5
-.yuv_g63_test3:
- cmp.l %d1,%d6
- blt.b .yuv_b63_test3
- move.l %d1,%d6
-.yuv_b63_test3:
- cmp.l %d1,%d7
- blt.b .yuv_no_clamp3
- move.l %d1,%d7
+ moveq.l #63, %d0
+ cmp.l %d0, %d4
+ bls.s .yuv_red_ok3
+ spl.b %d4
+ and.l %d0, %d4
+.yuv_red_ok3:
+ cmp.l %d0, %d5
+ bls.s .yuv_green_ok3
+ spl.b %d5
+ and.l %d0, %d5
+.yuv_green_ok3:
+ cmp.l %d0, %d6
+ bls.s .yuv_blue_ok3
+ spl.b %d6
+ and.l %d0, %d6
+.yuv_blue_ok3:
.yuv_no_clamp3:
- /* : %d5 = R, %d6 = G, %d7 = B */
+ /* : %d4 = R, %d5 = G, %d6 = B */
- move.l %d6,%d1 /* save g for lower 9 bits */
- lsl.l #3,%d5 /* R << 3 */
- lsr.l #3,%d1 /* G >> 3 */
- or.l %d5,%d1
- move.w %d1,(%a0) /* |00000000|000000000|0000000r|rrrrrggg| */
- lsl.l #6,%d6 /* B << 6 */
- or.l %d6,%d7 /* |00000000|000000000|0000gggg|ggbbbbbb| */
- move.w %d7,(%a0)
+ move.l %d5,%d0 /* save g for lower 9 bits */
+ lsl.l #3,%d4 /* R << 3 */
+ lsr.l #3,%d0 /* G >> 3 */
+ or.l %d4,%d0
+ move.w %d0,(%a0) /* |00000000|000000000|0000000r|rrrrrggg| */
+ lsl.l #6,%d5 /* B << 6 */
+ or.l %d5,%d6 /* |00000000|000000000|0000gggg|ggbbbbbb| */
+ move.w %d6,(%a0)
/** Write second pixel **/
- clr %d1
- move.b (%a1)+,%d1 /* get y component */
- subq.l #8,%d1
- subq.l #8,%d1
- moveq.l #74,%d6
- muls.w %d6,%d1
- asr.l #8,%d1
- /* : %d1 = Y, %d2 = bu, %d3 = guv, %d4 = rv */
-
- /* Add Y + each chroma component (can clobber %d2-%d4 values now) */
- add.l %d1,%d4 /* get r */
- add.l %d1,%d3 /* get g */
- add.l %d1,%d2 /* get b */
-
- move.l %d2,%d1 /* is clamping needed? */
- or.l %d3,%d1
- or.l %d4,%d1
- asr.l #6,%d1
- beq.b .yuv_no_clamp4 /* values in range: skip clamping */
- bpl.b .yuv_r63_test4 /* no negative values: skip to high bounds checks */
-.yuv_r0_test4:
- clr.l %d1 /* check for any values < 0 */
- cmp.l %d1,%d4
- bgt.b .yuv_g0_test4
clr.l %d4
-.yuv_g0_test4:
- cmp.l %d1,%d3
- bgt.b .yuv_b0_test4
- clr.l %d3
-.yuv_b0_test4:
- cmp.l %d1,%d2
- bgt.b .yuv_r63_test4
- clr.l %d2
-.yuv_r63_test4: /* check for any values > 63 */
- moveq.l #63,%d1
- cmp.l %d1,%d4
- blt.b .yuv_g63_test4
- move.l %d1,%d4
-.yuv_g63_test4:
- cmp.l %d1,%d3
- blt.b .yuv_b63_test4
- move.l %d1,%d3
-.yuv_b63_test4:
- cmp.l %d1,%d2
- blt.b .yuv_no_clamp4
- move.l %d1,%d2
+ move.b (%a1)+,%d4 /* get y component */
+ moveq.l #74,%d0
+ muls.w %d0,%d4
+ asr.l #8,%d4
+ subq.l #4,%d4
+ /* : %d4 = Y, %d1 = bu, %d2 = guv, %d3 = rv */
+
+ /* Add Y + each chroma component (can clobber %d1-%d3 values now) */
+ add.l %d4,%d3 /* get r */
+ add.l %d4,%d2 /* get g */
+ add.l %d4,%d1 /* get b */
+
+ move.l %d1,%d0 /* is clamping needed? */
+ or.l %d2,%d0
+ or.l %d3,%d0
+ asr.l #6,%d0
+ beq.b .yuv_no_clamp4 /* values in range: skip clamping */
+ moveq.l #63, %d0
+ cmp.l %d0, %d3
+ bls.s .yuv_red_ok4
+ spl.b %d3
+ and.l %d0, %d3
+.yuv_red_ok4:
+ cmp.l %d0, %d2
+ bls.s .yuv_green_ok4
+ spl.b %d2
+ and.l %d0, %d2
+.yuv_green_ok4:
+ cmp.l %d0, %d1
+ bls.s .yuv_blue_ok4
+ spl.b %d1
+ and.l %d0, %d1
+.yuv_blue_ok4:
.yuv_no_clamp4:
- /* : %d4 = R, %d3 = G, %d2 = B */
-
- move.l %d3,%d1 /* save g for lower 9 bits */
- lsl.l #3,%d4 /* R << 3 */
- lsr.l #3,%d1 /* G >> 3 */
- or.l %d4,%d1 /* |00000000|000000000|0000000r|rrrrrggg| */
+ /* : %d3 = R, %d2 = G, %d1 = B */
+
+ move.l %d2,%d0 /* save g for lower 9 bits */
+ lsl.l #3,%d3 /* R << 3 */
+ lsr.l #3,%d0 /* G >> 3 */
+ or.l %d3,%d0 /* |00000000|000000000|0000000r|rrrrrggg| */
+ move.w %d0,(%a0)
+ lsl.l #6,%d2 /* G << 6 */
+ or.l %d2,%d1 /* |00000000|000000000|0000gggg|ggbbbbbb| */
move.w %d1,(%a0)
- lsl.l #6,%d3 /* G << 6 */
- or.l %d3,%d2 /* |00000000|000000000|0000gggg|ggbbbbbb| */
- move.w %d2,(%a0)
cmp.l %a1,%a5 /* run %a0 up to end of line */
bhi.w .yuv_line_loop2
- movem.l (%sp),%d2-%d7/%a2-%a5
- lea.l (40,%sp),%sp /* restore registers */
+ movem.l (%sp),%d2-%d6/%a2-%a5
+ lea.l (36,%sp),%sp /* restore registers */
rts
/* end lcd_write_yuv420_lines */