diff options
| author | Nils Wallménius <nils@rockbox.org> | 2010-05-11 22:23:43 +0000 |
|---|---|---|
| committer | Nils Wallménius <nils@rockbox.org> | 2010-05-11 22:23:43 +0000 |
| commit | 418c9eeb141ac751a59572fde1fcbc1e4655f064 (patch) | |
| tree | 7be639f0f7495bd9d8e78ffe14c5c5a0b9396c4a | |
| parent | cdcb4ba4401ce87cdbc9309612d6e7a649971398 (diff) | |
| download | rockbox-418c9eeb141ac751a59572fde1fcbc1e4655f064.zip rockbox-418c9eeb141ac751a59572fde1fcbc1e4655f064.tar.gz rockbox-418c9eeb141ac751a59572fde1fcbc1e4655f064.tar.bz2 rockbox-418c9eeb141ac751a59572fde1fcbc1e4655f064.tar.xz | |
Faster assembler strlen for coldfire using the load-a-whole-word-and-test-i-for-nullbytes-at-one trick, benched 28% faster than the old version
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25959 a1c6a512-1295-4272-9138-f99709370657
| -rw-r--r-- | firmware/target/coldfire/strlen-coldfire.S | 64 |
1 files changed, 50 insertions, 14 deletions
diff --git a/firmware/target/coldfire/strlen-coldfire.S b/firmware/target/coldfire/strlen-coldfire.S index a65b0c3..f1e5aca 100644 --- a/firmware/target/coldfire/strlen-coldfire.S +++ b/firmware/target/coldfire/strlen-coldfire.S @@ -5,9 +5,9 @@ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ * \/ \/ \/ \/ \/ - * $Id $ + * $Id$ * - * Copyright (C) 2007 Nils Wallménius + * Copyright (C) 2010 Nils Wallménius * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -21,22 +21,58 @@ /* size_t strlen(const char *str) */ - .section .text,"ax",@progbits - .align 2 - .globl strlen - .type strlen, @function + .section .text,"ax",@progbits + .align 2 + .globl strlen + .type strlen, @function strlen: - move.l 4(%sp),%a0 /* %a0 = *str */ - move.l %a0,%d0 /* %d0 = start address */ + move.l 4(%sp), %a0 /* %a0 = *str */ + move.l %a0, %a1 /* %a1 = start address */ + move.l %a0, %d0 + andi.l #3, %d0 /* %d0 = %a0 & 3 */ + jmp.l (2,%pc,%d0.l*2) + bra.b .bytes0 + bra.b .bytes3 + bra.b .bytes2 + bra.b .bytes1 +.bytes3: + tst.b (%a0)+ + beq.b .done +.bytes2: + tst.b (%a0)+ + beq.b .done +.bytes1: + tst.b (%a0)+ + beq.b .done +.bytes0: 1: - tst.b (%a0)+ /* test if %a0 == 0 and increment */ - bne.b 1b /* if the test was false repeat */ + move.l (%a0)+, %d0 /* load %d0 increment %a0 */ + /* use trick to test the whole word for null bytes */ + move.l %d0, %d1 + subi.l #0x01010101, %d1 + not.l %d0 + and.l %d1, %d0 + andi.l #0x80808080, %d0 + beq.b 1b /* if the test was false repeat */ - sub.l %d0,%a0 /* how many times did we repeat? */ - move.l %a0,%d0 - subq.l #1,%d0 /* %d0 is 1 too large due to the last increment */ + /* ok, so the last word contained a 0 byte, test individual bytes */ + subq.l #4, %a0 + tst.b (%a0)+ + beq.b .done + tst.b (%a0)+ + beq.b .done + tst.b (%a0)+ + beq.b .done + /* last byte must be 0 so we don't need to load it, so we don't increment a0 + so we jump past the subq instr */ + .word 0x51fa /* trapf.w, shadow next instr */ + +.done: + subq.l #1, %a0 /* %a0 is 1 too large due to the last increment */ + sub.l %a1, %a0 /* how many times did we repeat? */ + move.l %a0, %d0 /* return value in %d0 */ rts - .size strlen, .-strlen + .size strlen, .-strlen |