summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2010-05-11 22:23:43 +0000
committerNils Wallménius <nils@rockbox.org>2010-05-11 22:23:43 +0000
commit418c9eeb141ac751a59572fde1fcbc1e4655f064 (patch)
tree7be639f0f7495bd9d8e78ffe14c5c5a0b9396c4a
parentcdcb4ba4401ce87cdbc9309612d6e7a649971398 (diff)
downloadrockbox-418c9eeb141ac751a59572fde1fcbc1e4655f064.zip
rockbox-418c9eeb141ac751a59572fde1fcbc1e4655f064.tar.gz
rockbox-418c9eeb141ac751a59572fde1fcbc1e4655f064.tar.bz2
rockbox-418c9eeb141ac751a59572fde1fcbc1e4655f064.tar.xz
Faster assembler strlen for coldfire using the load-a-whole-word-and-test-i-for-nullbytes-at-one trick, benched 28% faster than the old version
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25959 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--firmware/target/coldfire/strlen-coldfire.S64
1 files changed, 50 insertions, 14 deletions
diff --git a/firmware/target/coldfire/strlen-coldfire.S b/firmware/target/coldfire/strlen-coldfire.S
index a65b0c3..f1e5aca 100644
--- a/firmware/target/coldfire/strlen-coldfire.S
+++ b/firmware/target/coldfire/strlen-coldfire.S
@@ -5,9 +5,9 @@
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
- * $Id $
+ * $Id$
*
- * Copyright (C) 2007 Nils Wallménius
+ * Copyright (C) 2010 Nils Wallménius
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -21,22 +21,58 @@
/* size_t strlen(const char *str) */
- .section .text,"ax",@progbits
- .align 2
- .globl strlen
- .type strlen, @function
+ .section .text,"ax",@progbits
+ .align 2
+ .globl strlen
+ .type strlen, @function
strlen:
- move.l 4(%sp),%a0 /* %a0 = *str */
- move.l %a0,%d0 /* %d0 = start address */
+ move.l 4(%sp), %a0 /* %a0 = *str */
+ move.l %a0, %a1 /* %a1 = start address */
+ move.l %a0, %d0
+ andi.l #3, %d0 /* %d0 = %a0 & 3 */
+ jmp.l (2,%pc,%d0.l*2)
+ bra.b .bytes0
+ bra.b .bytes3
+ bra.b .bytes2
+ bra.b .bytes1
+.bytes3:
+ tst.b (%a0)+
+ beq.b .done
+.bytes2:
+ tst.b (%a0)+
+ beq.b .done
+.bytes1:
+ tst.b (%a0)+
+ beq.b .done
+.bytes0:
1:
- tst.b (%a0)+ /* test if %a0 == 0 and increment */
- bne.b 1b /* if the test was false repeat */
+ move.l (%a0)+, %d0 /* load %d0 increment %a0 */
+ /* use trick to test the whole word for null bytes */
+ move.l %d0, %d1
+ subi.l #0x01010101, %d1
+ not.l %d0
+ and.l %d1, %d0
+ andi.l #0x80808080, %d0
+ beq.b 1b /* if the test was false repeat */
- sub.l %d0,%a0 /* how many times did we repeat? */
- move.l %a0,%d0
- subq.l #1,%d0 /* %d0 is 1 too large due to the last increment */
+ /* ok, so the last word contained a 0 byte, test individual bytes */
+ subq.l #4, %a0
+ tst.b (%a0)+
+ beq.b .done
+ tst.b (%a0)+
+ beq.b .done
+ tst.b (%a0)+
+ beq.b .done
+ /* last byte must be 0 so we don't need to load it, so we don't increment a0
+ so we jump past the subq instr */
+ .word 0x51fa /* trapf.w, shadow next instr */
+
+.done:
+ subq.l #1, %a0 /* %a0 is 1 too large due to the last increment */
+ sub.l %a1, %a0 /* how many times did we repeat? */
+ move.l %a0, %d0 /* return value in %d0 */
rts
- .size strlen, .-strlen
+ .size strlen, .-strlen