diff options
| author | Jens Arnold <amiconn@rockbox.org> | 2006-09-07 00:16:04 +0000 |
|---|---|---|
| committer | Jens Arnold <amiconn@rockbox.org> | 2006-09-07 00:16:04 +0000 |
| commit | 9d2f7b5c6dd01e113abf5ff75fa24d389da1f244 (patch) | |
| tree | 6468332a4cc60b3a1e38adfdbd55d97f1ac32b01 /firmware | |
| parent | 825fb8a2649b187c27d93ec28a4bb87fa93797f2 (diff) | |
| download | rockbox-9d2f7b5c6dd01e113abf5ff75fa24d389da1f244.zip rockbox-9d2f7b5c6dd01e113abf5ff75fa24d389da1f244.tar.gz rockbox-9d2f7b5c6dd01e113abf5ff75fa24d389da1f244.tar.bz2 rockbox-9d2f7b5c6dd01e113abf5ff75fa24d389da1f244.tar.xz | |
Assembler optimised memset16() for ARM, by Thom Johansen. Should speed up LCD clearing and solid rectangle drawing on colour iPods somewhat.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10900 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware')
| -rw-r--r-- | firmware/SOURCES | 2 | ||||
| -rwxr-xr-x | firmware/common/memset16.c | 6 | ||||
| -rwxr-xr-x | firmware/include/memory.h | 2 | ||||
| -rwxr-xr-x | firmware/target/arm/memset16-arm.S | 80 |
4 files changed, 83 insertions, 7 deletions
diff --git a/firmware/SOURCES b/firmware/SOURCES index 9001535..545227f 100644 --- a/firmware/SOURCES +++ b/firmware/SOURCES @@ -53,7 +53,7 @@ target/sh/memset-sh.S common/memcpy.c common/memmove.c target/arm/memset-arm.S -common/memset16.c +target/arm/memset16-arm.S #else common/memcpy.c common/memmove.c diff --git a/firmware/common/memset16.c b/firmware/common/memset16.c index bc187a5..5f0fc3f 100755 --- a/firmware/common/memset16.c +++ b/firmware/common/memset16.c @@ -22,15 +22,13 @@ #define UNALIGNED(X) ((long)X & (sizeof(long) - 1)) #define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE) -void *memset16(void *dst, int val, size_t len) +void memset16(void *dst, int val, size_t len) { #if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) unsigned short *p = (unsigned short *)dst; while (len--) *p++ = val; - - return dst; #else unsigned short *p = (unsigned short *)dst; unsigned int i; @@ -73,7 +71,5 @@ void *memset16(void *dst, int val, size_t len) while (len--) *p++ = val; - - return dst; #endif /* not PREFER_SIZE_OVER_SPEED */ } diff --git a/firmware/include/memory.h b/firmware/include/memory.h index 2b2a60c..559c6ed 100755 --- a/firmware/include/memory.h +++ b/firmware/include/memory.h @@ -22,6 +22,6 @@ #include <sys/types.h> -void *memset16(void *dst, int val, size_t len); +void memset16(void *dst, int val, size_t len); #endif /* _MEMORY_H_ */ diff --git a/firmware/target/arm/memset16-arm.S b/firmware/target/arm/memset16-arm.S new file mode 100755 index 0000000..13213c5 --- /dev/null +++ b/firmware/target/arm/memset16-arm.S @@ -0,0 +1,80 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006 by Thom Johansen + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ +#include "config.h" + + .section .icode,"ax",%progbits + + .align 2 + +/* The following code is based on code from the Linux kernel version 2.6.15.3, + * linux/arch/arm/lib/memset.S + * + * Copyright (C) 1995-2000 Russell King + */ + + .global memset16 + .type memset16,%function +memset16: + tst r0, #2 @ unaligned? + cmpne r2, #0 + strneh r1, [r0], #2 @ store one halfword to align + subne r2, r2, #1 + +/* + * we know that the pointer in r0 is aligned to a word boundary. + */ + orr r1, r1, r1, lsl #16 + mov r3, r1 + cmp r2, #8 + blt 4f +/* + * We need an extra register for this loop - save the return address and + * use the LR + */ + str lr, [sp, #-4]! + mov ip, r1 + mov lr, r1 + +2: subs r2, r2, #32 + stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. + stmgeia r0!, {r1, r3, ip, lr} + stmgeia r0!, {r1, r3, ip, lr} + stmgeia r0!, {r1, r3, ip, lr} + bgt 2b + ldmeqfd sp!, {pc} @ Now <64 bytes to go. +/* + * No need to correct the count; we're only testing bits from now on + */ + tst r2, #16 + stmneia r0!, {r1, r3, ip, lr} + stmneia r0!, {r1, r3, ip, lr} + tst r2, #8 + stmneia r0!, {r1, r3, ip, lr} + ldr lr, [sp], #4 + +4: tst r2, #4 + stmneia r0!, {r1, r3} + tst r2, #2 + strne r1, [r0], #4 + + tst r2, #1 + strneh r1, [r0], #2 + bx lr +.end: + .size memset16,.end-memset16 |