diff options
| author | Jörg Hohensohn <hohensoh@rockbox.org> | 2003-10-12 15:45:03 +0000 |
|---|---|---|
| committer | Jörg Hohensohn <hohensoh@rockbox.org> | 2003-10-12 15:45:03 +0000 |
| commit | 1f30fa7db01391c8beec85e6c24268744b8244a5 (patch) | |
| tree | 8bbb8ce79df785cb3dcbddb5d380a97de99db0c1 | |
| parent | b3d96833d023547c5868e292cb530b85489a71b2 (diff) | |
| download | rockbox-1f30fa7db01391c8beec85e6c24268744b8244a5.zip rockbox-1f30fa7db01391c8beec85e6c24268744b8244a5.tar.gz rockbox-1f30fa7db01391c8beec85e6c24268744b8244a5.tar.bz2 rockbox-1f30fa7db01391c8beec85e6c24268744b8244a5.tar.xz | |
patch #801964 by Magnus Holmgren: 10% faster bitswap
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@3970 a1c6a512-1295-4272-9138-f99709370657
| -rw-r--r-- | firmware/bitswap.S | 54 |
1 files changed, 32 insertions, 22 deletions
diff --git a/firmware/bitswap.S b/firmware/bitswap.S index 9a4f760..da628a3 100644 --- a/firmware/bitswap.S +++ b/firmware/bitswap.S @@ -18,8 +18,8 @@ ****************************************************************************/ .section .icode,"ax",@progbits - .global _bitswap .align 4 + .global _bitswap .type _bitswap,@function /* Registers used: @@ -36,46 +36,56 @@ _bitswap: mov.l .fliptable,r7 - mov #1,r6 - mov r4,r0 - and #1,r0 /* odd address? */ - cmp/eq #0,r0 - bt .init /* no, address is even */ + mov #1,r6 + mov r4,r0 + tst #1,r0 /* odd address? */ + bt .init /* no, address is even */ mov.b @r4,r0 /* swap first byte */ extu.b r0,r0 mov.b @(r0,r7),r0 mov.b r0,@r4 - add #1,r4 - add #-1,r5 - bra .init + add #1,r4 + add #-1,r5 + bra .init + + /* The instruction order below is a bit strange, because: + * 1) Keeping load/stores on longword boundaries means the instruction + * fetch won't compete with the memory access (because instructions + * are fetched in pairs). + * 2) Using the result of a fetch in the next instruction causes a + * stall (except in certain circumstances). + * See the SH-1 programming manual for details. + */ + .loop: mov.w @r4,r1 /* data to flip */ - swap.b r1,r2 - extu.b r2,r0 /* high byte */ - mov.b @(r0,r7),r2 - extu.b r2,r0 /* Zero extend */ - swap.b r0,r3 /* put high byte in result */ - extu.b r1,r0 /* low byte */ - mov.b @(r0,r7),r1 - extu.b r1,r0 /* Zero extend */ - or r0,r3 /* put low byte in result */ + add #-2,r5 + swap.b r1,r2 /* get high byte */ + extu.b r2,r0 /* prepare high byte */ + mov.b @(r0,r7),r2 /* swap high byte */ + extu.b r1,r0 /* perpare low byte */ + mov.b @(r0,r7),r1 /* swap low byte */ + extu.b r2,r2 /* zero extend high byte */ + swap.b r2,r3 /* put high byte in result */ + extu.b r1,r0 /* zero extend low byte */ + or r0,r3 /* put low byte in result */ mov.w r3,@r4 /* store result */ - add #2,r4 - add #-2,r5 + add #2,r4 .init: cmp/gt r6,r5 /* while [bytes remaining] > 1 */ - bt .loop /* (at least 2 bytes left) */ + bt .loop /* (at least 2 bytes left) */ cmp/eq r6,r5 bf .exit /* if not 1 byte left, exit */ - mov.b @r4,r0 /* swap last byte */ + mov.b @r4,r0 /* swap last byte */ extu.b r0,r0 mov.b @(r0,r7),r0 mov.b r0,@r4 .exit: rts + nop .align 4 |