summaryrefslogtreecommitdiff
path: root/firmware/common/memset.S
diff options
context:
space:
mode:
authorJörg Hohensohn <hohensoh@rockbox.org>2004-03-18 22:06:36 +0000
committerJörg Hohensohn <hohensoh@rockbox.org>2004-03-18 22:06:36 +0000
commitb61cf76aba768513ab2a1fa9e7cc80f59dbce02f (patch)
treedb81683f1743eab4a04f28b4b604aa1bdb297a4b /firmware/common/memset.S
parentc40c069a67a41a85eb2525561de65b11d240d85e (diff)
downloadrockbox-b61cf76aba768513ab2a1fa9e7cc80f59dbce02f.zip
rockbox-b61cf76aba768513ab2a1fa9e7cc80f59dbce02f.tar.gz
rockbox-b61cf76aba768513ab2a1fa9e7cc80f59dbce02f.tar.bz2
rockbox-b61cf76aba768513ab2a1fa9e7cc80f59dbce02f.tar.xz
patch #917153: faster memset()/memcpy()
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@4406 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware/common/memset.S')
-rw-r--r--firmware/common/memset.S108
1 files changed, 108 insertions, 0 deletions
diff --git a/firmware/common/memset.S b/firmware/common/memset.S
new file mode 100644
index 0000000..038915c
--- /dev/null
+++ b/firmware/common/memset.S
@@ -0,0 +1,108 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2004 by Jens Arnold
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+ .section .icode,"ax",@progbits
+
+ .align 2
+ .global _memset
+ .type _memset,@function
+
+/* Fills a memory region with specified byte value
+ * This version is optimized for speed
+ *
+ * arguments:
+ * r4 - start address
+ * r5 - data
+ * r6 - length
+ *
+ * return value:
+ * r0 - start address (like ANSI version)
+ *
+ * register usage:
+ * r0 - temporary
+ * r1 - bit mask for rounding to long bounds
+ * r2 - last / first long bound (only if >= 12 bytes)
+ * r4 - start address
+ * r5 - data (spread to all 4 bytes if >= 12 bytes)
+ * r6 - current address (runs down from end to start)
+ *
+ * The instruction order below is devised in a way to utilize the pipelining
+ * of the SH1 to the max. The routine fills memory from end to start in
+ * order to utilize the auto-decrementing store instructions.
+ */
+
+_memset:
+ add r4,r6 /* r6 = end_address */
+
+ mov r6,r0
+ add #-12,r0 /* r0 = r6 - 12; don't go below 12 here! */
+ cmp/hs r4,r0 /* >= 12 bytes to fill? */
+ bf .start_b2 /* no, jump directly to byte loop */
+
+ extu.b r5,r5 /* start: spread data to all 4 bytes */
+ swap.b r5,r0
+ or r0,r5 /* data now in 2 lower bytes of r5 */
+ swap.w r5,r0
+ or r0,r5 /* data now in all 4 bytes of r5 */
+
+ mov #-4,r1 /* r1 = 0xFFFFFFFC */
+
+ mov r6,r2
+ bra .start_b1
+ and r1,r2 /* r2 = last long bound */
+
+ /* leading byte loop: sets 0..3 bytes */
+.loop_b1:
+ mov.b r5,@-r6 /* store byte */
+.start_b1:
+ cmp/hi r2,r6 /* runs r6 down to last long bound */
+ bt .loop_b1
+
+ mov r4,r2
+ add #11,r2 /* combined for rounding and offset */
+ and r1,r2 /* r2 = first long bound + 8 */
+
+ /* main loop: set 2 longs per pass */
+.loop2_l:
+ mov.l r5,@-r6 /* store first long */
+ cmp/hi r2,r6 /* runs r6 down to first or second long bound */
+ mov.l r5,@-r6 /* store second long */
+ bt .loop2_l
+
+ add #-8,r2 /* correct offset */
+ cmp/hi r2,r6 /* 1 long left? */
+ bf .start_b2 /* no, jump to trailing byte loop */
+
+ bra .start_b2 /* jump to trailing byte loop */
+ mov.l r5,@-r6 /* store last long */
+
+ /* trailing byte loop */
+ .align 2
+.loop_b2:
+ mov.b r5,@-r6 /* store byte */
+.start_b2:
+ cmp/hi r4,r6 /* runs r6 down to the start address */
+ bt .loop_b2
+
+ rts
+ mov r4,r0 /* return start address */
+
+.end:
+ .size _memset,.end-_memset
+