summaryrefslogtreecommitdiff
path: root/apps/codecs/demac/libdemac/vector_math16_cf.h
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2008-11-19 21:31:33 +0000
committerJens Arnold <amiconn@rockbox.org>2008-11-19 21:31:33 +0000
commit2a5053f58c1a33334776cc90264c67dde815cef3 (patch)
tree7acc0727874ff6b307eff293a18172a3239cd895 /apps/codecs/demac/libdemac/vector_math16_cf.h
parent14d37cb4555703d216e954db15ccca2c34642dc3 (diff)
downloadrockbox-2a5053f58c1a33334776cc90264c67dde815cef3.zip
rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.tar.gz
rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.tar.bz2
rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.tar.xz
Several tweaks and cleanups: * Use .rept instead of repeated macros for repeating blocks. * Use MUL (variant) instead of MLA (variant) in the first step of the ARM scalarproduct() if there's no loop. * Unroll ARM assembler functions to 32 where not already done, plus the generic scalarproduct().
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19144 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math16_cf.h')
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_cf.h55
1 files changed, 22 insertions, 33 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_cf.h b/apps/codecs/demac/libdemac/vector_math16_cf.h
index 0c3aaca..11e7f07 100644
--- a/apps/codecs/demac/libdemac/vector_math16_cf.h
+++ b/apps/codecs/demac/libdemac/vector_math16_cf.h
@@ -67,7 +67,7 @@ static inline void vector_add(int16_t* v1, int16_t* v2)
"move.l %%d3, (%[v1])+ \n"
"lea.l (16, %[v2]), %[v2] \n"
"move.l %%d4, %%d0 \n"
-
+
"movem.l (%[v1]), %%a0-%%a3 \n"
"movem.l (%[v2]), %%d1-%%d4 \n"
ADDHALFXREGS(%%a0, %%d1, %%d0)
@@ -175,7 +175,7 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
"move.l %%d3, (%[v1])+ \n"
"lea.l (16, %[v2]), %[v2] \n"
"move.l %%d4, %%d0 \n"
-
+
"movem.l (%[v2]), %%d1-%%d4 \n"
"movem.l (%[v1]), %%a0-%%a3 \n"
SUBHALFXREGS(%%a0, %%d1, %%d0)
@@ -207,7 +207,6 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
"move.l %%d2, (%[v1])+ \n"
SUBHALFREGS(%%a3, %%d4, %%d3)
"move.l %%d3, (%[v1])+ \n"
-
"lea.l (16, %[v2]), %[v2] \n"
"movem.l (%[v2]), %%d1-%%d4 \n"
@@ -248,22 +247,16 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
* in signed integer mode - call above macro before use. */
static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
{
- int res = 0;
+ int res;
#if ORDER > 32
int cnt = ORDER>>5;
#endif
-#define MACBLOCK4 \
- "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" \
- "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" \
- "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" \
- "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
-
-#define MACBLOCK4_U2 \
- "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" \
- "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" \
- "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" \
- "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+#if ORDER > 16
+#define MAC_BLOCKS "7"
+#else
+#define MAC_BLOCKS "3"
+#endif
asm volatile (
"move.l %[v2], %%d0 \n"
@@ -274,15 +267,13 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
"move.l (%[v1])+, %%d0 \n"
"move.w (%[v2])+, %%d1 \n"
"1: \n"
-#if ORDER > 16
- MACBLOCK4_U2
- MACBLOCK4_U2
- MACBLOCK4_U2
- MACBLOCK4_U2
-#endif
- MACBLOCK4_U2
- MACBLOCK4_U2
- MACBLOCK4_U2
+ ".rept " MAC_BLOCKS "\n"
+ "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+ "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+ "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+ "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+ ".endr \n"
+
"mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
"mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
@@ -299,15 +290,13 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
"move.l (%[v1])+, %%d0 \n"
"move.l (%[v2])+, %%d1 \n"
"1: \n"
-#if ORDER > 16
- MACBLOCK4
- MACBLOCK4
- MACBLOCK4
- MACBLOCK4
-#endif
- MACBLOCK4
- MACBLOCK4
- MACBLOCK4
+ ".rept " MAC_BLOCKS "\n"
+ "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
+ "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+ "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+ "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+ ".endr \n"
+
"mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
"mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
#if ORDER > 32