libwmapro: slightly shorter and faster inline asm fixed point multiplication routines, speedup is ~0.5%. Also don't lie to gcc about which vars are changed by the asm.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27584 a1c6a512-1295-4272-9138-f99709370657
author: Nils Wallménius <nils@rockbox.org> 2010-07-26 23:00:22 +0000
committer: Nils Wallménius <nils@rockbox.org> 2010-07-26 23:00:22 +0000
commit: 30d286d859aad049fb549f48080a196f6074a9fa (patch)
tree: 540fb23031e23da447a0d58e1cb041fc6a311faf /apps/codecs/libwmapro
parent: 5dd8c53b960d0b6680a0555a99e3232a5f890d07 (diff)
download: rockbox-30d286d859aad049fb549f48080a196f6074a9fa.zip
rockbox-30d286d859aad049fb549f48080a196f6074a9fa.tar.gz
rockbox-30d286d859aad049fb549f48080a196f6074a9fa.tar.bz2
rockbox-30d286d859aad049fb549f48080a196f6074a9fa.tar.xz
1 files changed, 22 insertions, 25 deletions
diff --git a/apps/codecs/libwmapro/wmapro_math.h b/apps/codecs/libwmapro/wmapro_math.h
index 30b9a98..5220560 100644
--- a/apps/codecs/libwmapro/wmapro_math.h
+++ b/apps/codecs/libwmapro/wmapro_math.h
@@ -95,37 +95,34 @@
     /* Calculates: result = (X*Y)>>16 */
     #define fixmul16(X,Y) \
     ({ \
-        int32_t t1, t2; \
+        int32_t t, x = (X); \
         asm volatile ( \
-            "mac.l   %[x],%[y],%%acc0\n\t" /* multiply */ \
-            "mulu.l  %[y],%[x]       \n\t" /* get lower half, avoid emac stall */ \
-            "movclr.l %%acc0,%[t1]   \n\t" /* get higher half */ \
-            "moveq.l #15,%[t2]       \n\t" \
-            "asl.l   %[t2],%[t1]     \n\t" /* hi <<= 15, plus one free */ \
-            "moveq.l #16,%[t2]       \n\t" \
-            "lsr.l   %[t2],%[x]      \n\t" /* (unsigned)lo >>= 16 */ \
-            "or.l    %[x],%[t1]      \n\t" /* combine result */ \
-            : [t1]"=&d"(t1), [t2]"=&d"(t2) \
-            : [x] "d" ((X)), [y] "d" ((Y))); \
-        t1; \
+            "mac.l    %[x],%[y],%%acc0\n\t" /* multiply */ \
+            "mulu.l   %[y],%[x]       \n\t" /* get lower half, avoid emac stall */ \
+            "movclr.l %%acc0,%[t]     \n\t" /* get higher half */ \
+            "lsr.l    #1,%[t]         \n\t" /* hi >>= 1 to compensate emac shift */ \
+            "move.w   %[t],%[x]       \n\t" /* combine halfwords */\
+            "swap     %[x]            \n\t" \
+            : [t]"=&d"(t), [x] "+d" (x) \
+            : [y] "d" ((Y))); \
+        x; \
     })
     
     /* Calculates: result = (X*Y)>>24 */
     #define fixmul24(X,Y) \
     ({ \
-        int32_t t1, t2; \
+        int32_t t, x = (X); \
         asm volatile ( \
-            "mac.l   %[x],%[y],%%acc0\n\t" /* multiply */ \
-            "mulu.l  %[y],%[x]       \n\t" /* get lower half, avoid emac stall */ \
-            "movclr.l %%acc0,%[t1]   \n\t" /* get higher half */ \
-            "moveq.l #7,%[t2]        \n\t" \
-            "asl.l   %[t2],%[t1]     \n\t" /* hi <<= 7, plus one free */ \
-            "moveq.l #24,%[t2]       \n\t" \
-            "lsr.l   %[t2],%[x]      \n\t" /* (unsigned)lo >>= 24 */ \
-            "or.l    %[x],%[t1]      \n\t" /* combine result */ \
-            : [t1]"=&d"(t1), [t2]"=&d"(t2) \
-            : [x] "d" ((X)), [y] "d" ((Y))); \
-        t1; \
+            "mac.l    %[x],%[y],%%acc0\n\t" /* multiply */ \
+            "mulu.l   %[y],%[x]       \n\t" /* get lower half, avoid emac stall */ \
+            "moveq.l  #24,%[t]        \n\t" \
+            "lsr.l    %[t],%[x]       \n\t" /* (unsigned)lo >>= 24 */ \
+            "movclr.l %%acc0,%[t]     \n\t" /* get higher half */ \
+            "asl.l    #7,%[t]         \n\t" /* hi <<= 7, plus one free */ \
+            "or.l     %[x],%[t]       \n\t" /* combine result */ \
+            : [t]"=&d"(t), [x] "+d" (x) \
+            : [y] "d" ((Y))); \
+        t; \
     })
 
     /* Calculates: result = (X*Y)>>32 */
@@ -239,7 +236,7 @@ static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src,
 {
     int i;
     for(i=0; i<len; i++)
-        dst[i] = fixmul24(src[i], mul);   
+        dst[i] = fixmul24(src[i], mul);
 }
 
 static inline int av_clip(int a, int amin, int amax)
author	Nils Wallménius <nils@rockbox.org>	2010-07-26 23:00:22 +0000
committer	Nils Wallménius <nils@rockbox.org>	2010-07-26 23:00:22 +0000
commit	30d286d859aad049fb549f48080a196f6074a9fa (patch)
tree	540fb23031e23da447a0d58e1cb041fc6a311faf /apps/codecs/libwmapro
parent	5dd8c53b960d0b6680a0555a99e3232a5f890d07 (diff)
download	rockbox-30d286d859aad049fb549f48080a196f6074a9fa.zip rockbox-30d286d859aad049fb549f48080a196f6074a9fa.tar.gz rockbox-30d286d859aad049fb549f48080a196f6074a9fa.tar.bz2 rockbox-30d286d859aad049fb549f48080a196f6074a9fa.tar.xz