Several tweaks and cleanups: * Use .rept instead of repeated macros for repeating blocks. * Use MUL (variant) instead of MLA (variant) in the first step of the ARM scalarproduct() if there's no loop. * Unroll ARM assembler functions to 32 where not already done, plus the generic scalarproduct().

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19144 a1c6a512-1295-4272-9138-f99709370657
author: Jens Arnold <amiconn@rockbox.org> 2008-11-19 21:31:33 +0000
committer: Jens Arnold <amiconn@rockbox.org> 2008-11-19 21:31:33 +0000
commit: 2a5053f58c1a33334776cc90264c67dde815cef3 (patch)
tree: 7acc0727874ff6b307eff293a18172a3239cd895 /apps/codecs/demac/libdemac/vector_math16_cf.h
parent: 14d37cb4555703d216e954db15ccca2c34642dc3 (diff)
download: rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.zip
rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.tar.gz
rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.tar.bz2
rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.tar.xz
1 files changed, 22 insertions, 33 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_cf.h b/apps/codecs/demac/libdemac/vector_math16_cf.h
index 0c3aaca..11e7f07 100644
--- a/apps/codecs/demac/libdemac/vector_math16_cf.h
+++ b/apps/codecs/demac/libdemac/vector_math16_cf.h
@@ -67,7 +67,7 @@ static inline void vector_add(int16_t* v1, int16_t* v2)
         "move.l  %%d3, (%[v1])+      \n"
         "lea.l   (16, %[v2]), %[v2]  \n"
         "move.l  %%d4, %%d0          \n"
-        
+
         "movem.l (%[v1]), %%a0-%%a3  \n"
         "movem.l (%[v2]), %%d1-%%d4  \n"
         ADDHALFXREGS(%%a0, %%d1, %%d0)
@@ -175,7 +175,7 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
         "move.l  %%d3, (%[v1])+      \n"
         "lea.l   (16, %[v2]), %[v2]  \n"
         "move.l  %%d4, %%d0          \n"
-        
+
         "movem.l (%[v2]), %%d1-%%d4  \n"
         "movem.l (%[v1]), %%a0-%%a3  \n"
         SUBHALFXREGS(%%a0, %%d1, %%d0)
@@ -207,7 +207,6 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
         "move.l  %%d2, (%[v1])+      \n"
         SUBHALFREGS(%%a3, %%d4, %%d3)
         "move.l  %%d3, (%[v1])+      \n"
-
         "lea.l   (16, %[v2]), %[v2]  \n"
 
         "movem.l (%[v2]), %%d1-%%d4  \n"
@@ -248,22 +247,16 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
  * in signed integer mode - call above macro before use. */
 static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
 {
-    int res = 0;
+    int res;
 #if ORDER > 32
     int cnt = ORDER>>5;
 #endif
 
-#define MACBLOCK4                                        \
-        "mac.w   %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" \
-        "mac.w   %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" \
-        "mac.w   %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" \
-        "mac.w   %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
-
-#define MACBLOCK4_U2                                     \
-        "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" \
-        "mac.w   %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" \
-        "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" \
-        "mac.w   %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+#if ORDER > 16
+#define MAC_BLOCKS "7"
+#else
+#define MAC_BLOCKS "3"
+#endif
 
     asm volatile (
         "move.l  %[v2], %%d0                         \n"
@@ -274,15 +267,13 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
         "move.l  (%[v1])+, %%d0                      \n"
         "move.w  (%[v2])+, %%d1                      \n"
     "1:                                              \n"
-#if ORDER > 16
-        MACBLOCK4_U2
-        MACBLOCK4_U2
-        MACBLOCK4_U2
-        MACBLOCK4_U2
-#endif
-        MACBLOCK4_U2
-        MACBLOCK4_U2
-        MACBLOCK4_U2
+        ".rept " MAC_BLOCKS                         "\n"
+        "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+        "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+        ".endr                                       \n"
+
         "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
         "mac.w   %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
         "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
@@ -299,15 +290,13 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
         "move.l  (%[v1])+, %%d0                      \n"
         "move.l  (%[v2])+, %%d1                      \n"
     "1:                                              \n"
-#if ORDER > 16
-        MACBLOCK4
-        MACBLOCK4
-        MACBLOCK4
-        MACBLOCK4
-#endif
-        MACBLOCK4
-        MACBLOCK4
-        MACBLOCK4
+        ".rept " MAC_BLOCKS                         "\n"
+        "mac.w   %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
+        "mac.w   %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+        "mac.w   %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+        "mac.w   %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+        ".endr                                       \n"
+
         "mac.w   %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
         "mac.w   %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
 #if ORDER > 32
author	Jens Arnold <amiconn@rockbox.org>	2008-11-19 21:31:33 +0000
committer	Jens Arnold <amiconn@rockbox.org>	2008-11-19 21:31:33 +0000
commit	2a5053f58c1a33334776cc90264c67dde815cef3 (patch)
tree	7acc0727874ff6b307eff293a18172a3239cd895 /apps/codecs/demac/libdemac/vector_math16_cf.h
parent	14d37cb4555703d216e954db15ccca2c34642dc3 (diff)
download	rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.zip rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.tar.gz rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.tar.bz2 rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.tar.xz