summaryrefslogtreecommitdiff
path: root/apps/codecs/demac/libdemac/predictor-cf.S
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2008-11-24 18:40:49 +0000
committerJens Arnold <amiconn@rockbox.org>2008-11-24 18:40:49 +0000
commit3761c0108cbfc6f88c4bf43fc13a38a2f7db0d6f (patch)
treea064992dc69635d60e7bc1cb106fc43c6a4e3a40 /apps/codecs/demac/libdemac/predictor-cf.S
parent66c0cf2eb17158eec9d0cd2553481a2caf86e611 (diff)
downloadrockbox-3761c0108cbfc6f88c4bf43fc13a38a2f7db0d6f.zip
rockbox-3761c0108cbfc6f88c4bf43fc13a38a2f7db0d6f.tar.gz
rockbox-3761c0108cbfc6f88c4bf43fc13a38a2f7db0d6f.tar.bz2
rockbox-3761c0108cbfc6f88c4bf43fc13a38a2f7db0d6f.tar.xz
Branch optimisation in both C (giving hints to gcc - verified using -fprofile-arcs and gcov) and asm files. Biggest effect on coldfire (-c1000: +8%, -c2000: +5%), but ARM also profits a bit (less than 1% on ARM7TDMI, around 1% on ARM1136).
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19199 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/demac/libdemac/predictor-cf.S')
-rw-r--r--apps/codecs/demac/libdemac/predictor-cf.S46
1 files changed, 20 insertions, 26 deletions
diff --git a/apps/codecs/demac/libdemac/predictor-cf.S b/apps/codecs/demac/libdemac/predictor-cf.S
index b12d093..0a1ffe9 100644
--- a/apps/codecs/demac/libdemac/predictor-cf.S
+++ b/apps/codecs/demac/libdemac/predictor-cf.S
@@ -486,10 +486,18 @@ predictor_decode_stereo:
| %a3 = &p->historybuffer[PREDICTOR_HISTORY_SIZE]
cmp.l %a3, %a5
- bne.s .endofloop
+ beq.s .move_hist | The history buffer is full, we need to do a memmove
- | The history buffer is full, we need to do a memmove:
+ subq.l #1, (8,%sp) | decrease loop count
+ bne.w .loop
+.done:
+ move.l %a5, (%a6) | Save value of p->buf
+ movem.l (3*4,%sp), %d2-%d7/%a2-%a6
+ lea.l (14*4,%sp), %sp
+ rts
+
+.move_hist:
lea.l (historybuffer,%a6), %a3
| dest = %a3 (p->historybuffer)
@@ -497,33 +505,19 @@ predictor_decode_stereo:
| n = 200
movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes
- lea.l (40,%a5), %a5
movem.l %d0-%d7/%a0-%a1, (%a3)
- lea.l (40,%a3), %a3
- movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes
- lea.l (40,%a5), %a5
- movem.l %d0-%d7/%a0-%a1, (%a3)
- lea.l (40,%a3), %a3
- movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes
- lea.l (40,%a5), %a5
- movem.l %d0-%d7/%a0-%a1, (%a3)
- lea.l (40,%a3), %a3
- movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes
- lea.l (40,%a5), %a5
- movem.l %d0-%d7/%a0-%a1, (%a3)
- lea.l (40,%a3), %a3
- movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes
- lea.l (40,%a5), %a5
- movem.l %d0-%d7/%a0-%a1, (%a3)
- lea.l (40,%a3), %a3
+ movem.l (40,%a5), %d0-%d7/%a0-%a1 | 40 bytes
+ movem.l %d0-%d7/%a0-%a1, (40,%a3)
+ movem.l (80,%a5), %d0-%d7/%a0-%a1 | 40 bytes
+ movem.l %d0-%d7/%a0-%a1, (80,%a3)
+ movem.l (120,%a5), %d0-%d7/%a0-%a1 | 40 bytes
+ movem.l %d0-%d7/%a0-%a1, (120,%a3)
+ movem.l (160,%a5), %d0-%d7/%a0-%a1 | 40 bytes
+ movem.l %d0-%d7/%a0-%a1, (160,%a3)
- lea.l (historybuffer,%a6), %a5 | p->buf = &p->historybuffer[0]
+ move.l %a3, %a5 | p->buf = &p->historybuffer[0]
-.endofloop:
subq.l #1, (8,%sp) | decrease loop count
bne.w .loop
- move.l %a5, (%a6) | Save value of p->buf
- movem.l (3*4,%sp), %d2-%d7/%a2-%a6
- lea.l (14*4,%sp), %sp
- rts
+ bra.s .done