summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJörg Hohensohn <hohensoh@rockbox.org>2003-10-12 15:45:03 +0000
committerJörg Hohensohn <hohensoh@rockbox.org>2003-10-12 15:45:03 +0000
commit1f30fa7db01391c8beec85e6c24268744b8244a5 (patch)
tree8bbb8ce79df785cb3dcbddb5d380a97de99db0c1
parentb3d96833d023547c5868e292cb530b85489a71b2 (diff)
downloadrockbox-1f30fa7db01391c8beec85e6c24268744b8244a5.zip
rockbox-1f30fa7db01391c8beec85e6c24268744b8244a5.tar.gz
rockbox-1f30fa7db01391c8beec85e6c24268744b8244a5.tar.bz2
rockbox-1f30fa7db01391c8beec85e6c24268744b8244a5.tar.xz
patch #801964 by Magnus Holmgren: 10% faster bitswap
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@3970 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--firmware/bitswap.S54
1 files changed, 32 insertions, 22 deletions
diff --git a/firmware/bitswap.S b/firmware/bitswap.S
index 9a4f760..da628a3 100644
--- a/firmware/bitswap.S
+++ b/firmware/bitswap.S
@@ -18,8 +18,8 @@
****************************************************************************/
.section .icode,"ax",@progbits
- .global _bitswap
.align 4
+ .global _bitswap
.type _bitswap,@function
/* Registers used:
@@ -36,46 +36,56 @@
_bitswap:
mov.l .fliptable,r7
- mov #1,r6
- mov r4,r0
- and #1,r0 /* odd address? */
- cmp/eq #0,r0
- bt .init /* no, address is even */
+ mov #1,r6
+ mov r4,r0
+ tst #1,r0 /* odd address? */
+ bt .init /* no, address is even */
mov.b @r4,r0 /* swap first byte */
extu.b r0,r0
mov.b @(r0,r7),r0
mov.b r0,@r4
- add #1,r4
- add #-1,r5
- bra .init
+ add #1,r4
+ add #-1,r5
+ bra .init
+
+ /* The instruction order below is a bit strange, because:
+ * 1) Keeping load/stores on longword boundaries means the instruction
+ * fetch won't compete with the memory access (because instructions
+ * are fetched in pairs).
+ * 2) Using the result of a fetch in the next instruction causes a
+ * stall (except in certain circumstances).
+ * See the SH-1 programming manual for details.
+ */
+
.loop:
mov.w @r4,r1 /* data to flip */
- swap.b r1,r2
- extu.b r2,r0 /* high byte */
- mov.b @(r0,r7),r2
- extu.b r2,r0 /* Zero extend */
- swap.b r0,r3 /* put high byte in result */
- extu.b r1,r0 /* low byte */
- mov.b @(r0,r7),r1
- extu.b r1,r0 /* Zero extend */
- or r0,r3 /* put low byte in result */
+ add #-2,r5
+ swap.b r1,r2 /* get high byte */
+ extu.b r2,r0 /* prepare high byte */
+ mov.b @(r0,r7),r2 /* swap high byte */
+ extu.b r1,r0 /* perpare low byte */
+ mov.b @(r0,r7),r1 /* swap low byte */
+ extu.b r2,r2 /* zero extend high byte */
+ swap.b r2,r3 /* put high byte in result */
+ extu.b r1,r0 /* zero extend low byte */
+ or r0,r3 /* put low byte in result */
mov.w r3,@r4 /* store result */
- add #2,r4
- add #-2,r5
+ add #2,r4
.init:
cmp/gt r6,r5 /* while [bytes remaining] > 1 */
- bt .loop /* (at least 2 bytes left) */
+ bt .loop /* (at least 2 bytes left) */
cmp/eq r6,r5
bf .exit /* if not 1 byte left, exit */
- mov.b @r4,r0 /* swap last byte */
+ mov.b @r4,r0 /* swap last byte */
extu.b r0,r0
mov.b @(r0,r7),r0
mov.b r0,@r4
.exit:
rts
+ nop
.align 4