diff options
| author | Jens Arnold <amiconn@rockbox.org> | 2008-11-24 18:40:49 +0000 |
|---|---|---|
| committer | Jens Arnold <amiconn@rockbox.org> | 2008-11-24 18:40:49 +0000 |
| commit | 3761c0108cbfc6f88c4bf43fc13a38a2f7db0d6f (patch) | |
| tree | a064992dc69635d60e7bc1cb106fc43c6a4e3a40 /apps/codecs/demac/libdemac/predictor-cf.S | |
| parent | 66c0cf2eb17158eec9d0cd2553481a2caf86e611 (diff) | |
| download | rockbox-3761c0108cbfc6f88c4bf43fc13a38a2f7db0d6f.zip rockbox-3761c0108cbfc6f88c4bf43fc13a38a2f7db0d6f.tar.gz rockbox-3761c0108cbfc6f88c4bf43fc13a38a2f7db0d6f.tar.bz2 rockbox-3761c0108cbfc6f88c4bf43fc13a38a2f7db0d6f.tar.xz | |
Branch optimisation in both C (giving hints to gcc - verified using -fprofile-arcs and gcov) and asm files. Biggest effect on coldfire (-c1000: +8%, -c2000: +5%), but ARM also profits a bit (less than 1% on ARM7TDMI, around 1% on ARM1136).
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19199 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/demac/libdemac/predictor-cf.S')
| -rw-r--r-- | apps/codecs/demac/libdemac/predictor-cf.S | 46 |
1 files changed, 20 insertions, 26 deletions
diff --git a/apps/codecs/demac/libdemac/predictor-cf.S b/apps/codecs/demac/libdemac/predictor-cf.S index b12d093..0a1ffe9 100644 --- a/apps/codecs/demac/libdemac/predictor-cf.S +++ b/apps/codecs/demac/libdemac/predictor-cf.S @@ -486,10 +486,18 @@ predictor_decode_stereo: | %a3 = &p->historybuffer[PREDICTOR_HISTORY_SIZE] cmp.l %a3, %a5 - bne.s .endofloop + beq.s .move_hist | The history buffer is full, we need to do a memmove - | The history buffer is full, we need to do a memmove: + subq.l #1, (8,%sp) | decrease loop count + bne.w .loop +.done: + move.l %a5, (%a6) | Save value of p->buf + movem.l (3*4,%sp), %d2-%d7/%a2-%a6 + lea.l (14*4,%sp), %sp + rts + +.move_hist: lea.l (historybuffer,%a6), %a3 | dest = %a3 (p->historybuffer) @@ -497,33 +505,19 @@ predictor_decode_stereo: | n = 200 movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes - lea.l (40,%a5), %a5 movem.l %d0-%d7/%a0-%a1, (%a3) - lea.l (40,%a3), %a3 - movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes - lea.l (40,%a5), %a5 - movem.l %d0-%d7/%a0-%a1, (%a3) - lea.l (40,%a3), %a3 - movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes - lea.l (40,%a5), %a5 - movem.l %d0-%d7/%a0-%a1, (%a3) - lea.l (40,%a3), %a3 - movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes - lea.l (40,%a5), %a5 - movem.l %d0-%d7/%a0-%a1, (%a3) - lea.l (40,%a3), %a3 - movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes - lea.l (40,%a5), %a5 - movem.l %d0-%d7/%a0-%a1, (%a3) - lea.l (40,%a3), %a3 + movem.l (40,%a5), %d0-%d7/%a0-%a1 | 40 bytes + movem.l %d0-%d7/%a0-%a1, (40,%a3) + movem.l (80,%a5), %d0-%d7/%a0-%a1 | 40 bytes + movem.l %d0-%d7/%a0-%a1, (80,%a3) + movem.l (120,%a5), %d0-%d7/%a0-%a1 | 40 bytes + movem.l %d0-%d7/%a0-%a1, (120,%a3) + movem.l (160,%a5), %d0-%d7/%a0-%a1 | 40 bytes + movem.l %d0-%d7/%a0-%a1, (160,%a3) - lea.l (historybuffer,%a6), %a5 | p->buf = &p->historybuffer[0] + move.l %a3, %a5 | p->buf = &p->historybuffer[0] -.endofloop: subq.l #1, (8,%sp) | decrease loop count bne.w .loop - move.l %a5, (%a6) | Save value of p->buf - movem.l (3*4,%sp), %d2-%d7/%a2-%a6 - lea.l (14*4,%sp), %sp - rts + bra.s .done |