summaryrefslogtreecommitdiff
path: root/apps/plugins
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2005-08-01 01:23:04 +0000
committerJens Arnold <amiconn@rockbox.org>2005-08-01 01:23:04 +0000
commit6b749c5ab1b0d75ccc65132f627a1230ffcdc21f (patch)
treefd9aff4f30daf07fe5f102f14cd2fcc2fae8213d /apps/plugins
parente7bd73f5dc83703e97f32c564ffa1e1a52b01e8c (diff)
downloadrockbox-6b749c5ab1b0d75ccc65132f627a1230ffcdc21f.zip
rockbox-6b749c5ab1b0d75ccc65132f627a1230ffcdc21f.tar.gz
rockbox-6b749c5ab1b0d75ccc65132f627a1230ffcdc21f.tar.bz2
rockbox-6b749c5ab1b0d75ccc65132f627a1230ffcdc21f.tar.xz
Further optimised asm 64 bit multiplication for SH1, speedup around 20%.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@7274 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/plugins')
-rw-r--r--apps/plugins/mandelbrot.c90
1 files changed, 41 insertions, 49 deletions
diff --git a/apps/plugins/mandelbrot.c b/apps/plugins/mandelbrot.c
index 8474969..3639431 100644
--- a/apps/plugins/mandelbrot.c
+++ b/apps/plugins/mandelbrot.c
@@ -75,66 +75,58 @@ static unsigned max_iter;
static unsigned char *gbuf;
static unsigned int gbuf_size = 0;
-static unsigned char graybuffer[LCD_HEIGHT];
+static unsigned char graybuffer[LCD_HEIGHT];
#if CONFIG_CPU == SH7034
long long mul64(long long f1, long long f2);
asm (
/* 64bit * 64bit -> 64bit multiplication. Works for both signed and unsigned */
- ".global _mul64 \n"
- ".type _mul64,@function\n"
-"_mul64: \n" /* Notation: abcd * efgh, where each letter */
- "mov.l r8,@-r15 \n" /* represents 16 bits. Called with: */
- "mov.l r9,@-r15 \n" /* r4 = ab, r5 = cd, r6 = ef, r7 = gh */
-
- "swap.w r5,r2 \n" /* r2 = dc */
- "mulu r2,r7 \n" /* c * h */
- "swap.w r7,r3 \n" /* r3 = hg */
- "sts macl,r1 \n" /* r1 = c * h */
- "mulu r5,r3 \n" /* d * g */
- "clrt \n"
- "sts macl,r9 \n" /* r9 = d * g */
- "addc r9,r1 \n" /* r1 += r9 */
- "movt r0 \n" /* move carry to r0 */
- "mov r1,r9 \n" /* r0r1 <<= 16 */
- "xtrct r0,r9 \n"
- "mulu r5,r7 \n" /* d * h */
- "mov r9,r0 \n"
- "shll16 r1 \n"
- "sts macl,r9 \n" /* r9 = d * h */
- "mov #0,r8 \n" /* r8 = 0 */
- "clrt \n" /* r0r1 += r8r9 */
- "mulu r4,r7 \n" /* b * h */
- "addc r9,r1 \n"
- "addc r8,r0 \n"
- "sts macl,r8 \n" /* r8 = b * h */
- "mulu r2,r3 \n" /* c * g */
- "add r8,r0 \n" /* r0r1 += r8 << 32 */
- "sts macl,r8 \n" /* r8 = c * g */
- "mulu r5,r6 \n" /* d * f */
- "add r8,r0 \n" /* r0r1 += r8 << 32 */
- "sts macl,r8 \n" /* r8 = d * f */
- "mulu r4,r3 \n" /* b * g */
- "add r8,r0 \n" /* r0r1 += r8 << 32 */
- "sts macl,r8 \n" /* r8 = b * g */
- "mulu r2,r6 \n" /* c * f */
+ ".global _mul64 \n" /* Notation: abcd * efgh, where each letter */
+ ".type _mul64,@function\n" /* represents 16 bits. Called with: */
+"_mul64: \n" /* r4 = ab, r5 = cd, r6 = ef, r7 = gh */
"swap.w r4,r2 \n" /* r2 = ba */
- "sts macl,r9 \n" /* r9 = c * f */
"mulu r2,r7 \n" /* a * h */
- "add r9,r8 \n" /* r8 += r9 */
"swap.w r6,r3 \n" /* r3 = fe */
- "sts macl,r9 \n" /* r9 = a * h */
+ "sts macl,r0 \n" /* r0 = a * h */
"mulu r5,r3 \n" /* d * e */
- "add r9,r8 \n" /* r8 += r9 */
- "sts macl,r9 \n" /* r9 = d * e */
- "add r9,r8 \n" /* r8 += r9 */
- "shll16 r8 \n" /* r8 <<= 16 */
- "add r8,r0 \n" /* r0r1 += r8 << 32 */
-
- "mov.l @r15+,r9 \n"
+ "swap.w r7,r3 \n" /* r3 = hg */
+ "sts macl,r1 \n" /* r1 = d * e */
+ "mulu r4,r3 \n" /* b * g */
+ "add r1,r0 \n" /* r0 += r1 */
+ "swap.w r5,r2 \n" /* r2 = dc */
+ "sts macl,r1 \n" /* r1 = b * g */
+ "mulu r2,r6 \n" /* c * f */
+ "add r1,r0 \n" /* r0 += r1 */
+ "sts macl,r1 \n" /* r1 = c * f */
+ "mulu r4,r7 \n" /* b * h */
+ "add r1,r0 \n" /* r0 += r1 */
+ "shll16 r0 \n" /* r0 <<= 16 */
+ "sts macl,r1 \n" /* r1 = b * h */
+ "mulu r2,r3 \n" /* c * g */
+ "add r1,r0 \n" /* r0 += r1 */
+ "sts macl,r1 \n" /* r1 = c * g */
+ "mulu r5,r6 \n" /* d * f */
+ "add r1,r0 \n" /* r0 += r1 */
+ "sts macl,r1 \n" /* r1 = d * f */
+ "mulu r5,r7 \n" /* d * h */
+ "add r1,r0 \n" /* r0 += r1 */
+ "sts macl,r1 \n" /* r1 = d * h */
+ "mulu r2,r7 \n" /* c * h */
+ "sts macl,r2 \n" /* r2 = c * h */
+ "mulu r5,r3 \n" /* d * g */
+ "clrt \n"
+ "sts macl,r3 \n" /* r3 = d * g */
+ "addc r2,r3 \n" /* r3 += r2, carry->r2 */
+ "movt r2 \n"
+ "mov r3,r4 \n" /* r2r3 <<= 16 */
+ "xtrct r2,r4 \n"
+ "mov r4,r2 \n"
+ "shll16 r3 \n"
+ "clrt \n" /* r0r1 += r2r3 */
+ "addc r3,r1 \n"
"rts \n"
- "mov.l @r15+,r8 \n"
+ "addc r2,r0 \n"
);
#define MUL64(a, b) mul64(a, b)
#else