summaryrefslogtreecommitdiff
path: root/firmware
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2006-09-07 00:16:04 +0000
committerJens Arnold <amiconn@rockbox.org>2006-09-07 00:16:04 +0000
commit9d2f7b5c6dd01e113abf5ff75fa24d389da1f244 (patch)
tree6468332a4cc60b3a1e38adfdbd55d97f1ac32b01 /firmware
parent825fb8a2649b187c27d93ec28a4bb87fa93797f2 (diff)
downloadrockbox-9d2f7b5c6dd01e113abf5ff75fa24d389da1f244.zip
rockbox-9d2f7b5c6dd01e113abf5ff75fa24d389da1f244.tar.gz
rockbox-9d2f7b5c6dd01e113abf5ff75fa24d389da1f244.tar.bz2
rockbox-9d2f7b5c6dd01e113abf5ff75fa24d389da1f244.tar.xz
Assembler optimised memset16() for ARM, by Thom Johansen. Should speed up LCD clearing and solid rectangle drawing on colour iPods somewhat.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10900 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware')
-rw-r--r--firmware/SOURCES2
-rwxr-xr-xfirmware/common/memset16.c6
-rwxr-xr-xfirmware/include/memory.h2
-rwxr-xr-xfirmware/target/arm/memset16-arm.S80
4 files changed, 83 insertions, 7 deletions
diff --git a/firmware/SOURCES b/firmware/SOURCES
index 9001535..545227f 100644
--- a/firmware/SOURCES
+++ b/firmware/SOURCES
@@ -53,7 +53,7 @@ target/sh/memset-sh.S
common/memcpy.c
common/memmove.c
target/arm/memset-arm.S
-common/memset16.c
+target/arm/memset16-arm.S
#else
common/memcpy.c
common/memmove.c
diff --git a/firmware/common/memset16.c b/firmware/common/memset16.c
index bc187a5..5f0fc3f 100755
--- a/firmware/common/memset16.c
+++ b/firmware/common/memset16.c
@@ -22,15 +22,13 @@
#define UNALIGNED(X) ((long)X & (sizeof(long) - 1))
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
-void *memset16(void *dst, int val, size_t len)
+void memset16(void *dst, int val, size_t len)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
unsigned short *p = (unsigned short *)dst;
while (len--)
*p++ = val;
-
- return dst;
#else
unsigned short *p = (unsigned short *)dst;
unsigned int i;
@@ -73,7 +71,5 @@ void *memset16(void *dst, int val, size_t len)
while (len--)
*p++ = val;
-
- return dst;
#endif /* not PREFER_SIZE_OVER_SPEED */
}
diff --git a/firmware/include/memory.h b/firmware/include/memory.h
index 2b2a60c..559c6ed 100755
--- a/firmware/include/memory.h
+++ b/firmware/include/memory.h
@@ -22,6 +22,6 @@
#include <sys/types.h>
-void *memset16(void *dst, int val, size_t len);
+void memset16(void *dst, int val, size_t len);
#endif /* _MEMORY_H_ */
diff --git a/firmware/target/arm/memset16-arm.S b/firmware/target/arm/memset16-arm.S
new file mode 100755
index 0000000..13213c5
--- /dev/null
+++ b/firmware/target/arm/memset16-arm.S
@@ -0,0 +1,80 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2006 by Thom Johansen
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+
+ .section .icode,"ax",%progbits
+
+ .align 2
+
+/* The following code is based on code from the Linux kernel version 2.6.15.3,
+ * linux/arch/arm/lib/memset.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ */
+
+ .global memset16
+ .type memset16,%function
+memset16:
+ tst r0, #2 @ unaligned?
+ cmpne r2, #0
+ strneh r1, [r0], #2 @ store one halfword to align
+ subne r2, r2, #1
+
+/*
+ * we know that the pointer in r0 is aligned to a word boundary.
+ */
+ orr r1, r1, r1, lsl #16
+ mov r3, r1
+ cmp r2, #8
+ blt 4f
+/*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+ */
+ str lr, [sp, #-4]!
+ mov ip, r1
+ mov lr, r1
+
+2: subs r2, r2, #32
+ stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
+ stmgeia r0!, {r1, r3, ip, lr}
+ stmgeia r0!, {r1, r3, ip, lr}
+ stmgeia r0!, {r1, r3, ip, lr}
+ bgt 2b
+ ldmeqfd sp!, {pc} @ Now <64 bytes to go.
+/*
+ * No need to correct the count; we're only testing bits from now on
+ */
+ tst r2, #16
+ stmneia r0!, {r1, r3, ip, lr}
+ stmneia r0!, {r1, r3, ip, lr}
+ tst r2, #8
+ stmneia r0!, {r1, r3, ip, lr}
+ ldr lr, [sp], #4
+
+4: tst r2, #4
+ stmneia r0!, {r1, r3}
+ tst r2, #2
+ strne r1, [r0], #4
+
+ tst r2, #1
+ strneh r1, [r0], #2
+ bx lr
+.end:
+ .size memset16,.end-memset16