diff options
| author | Rafaël Carré <rafael.carre@gmail.com> | 2010-08-03 17:41:34 +0000 |
|---|---|---|
| committer | Rafaël Carré <rafael.carre@gmail.com> | 2010-08-03 17:41:34 +0000 |
| commit | 2c00cb5e835f99ad17f9bb3a8d7dbbac2b14c05a (patch) | |
| tree | 656ee2abe40fde6de1fc4224bc1fd4551506d564 /apps | |
| parent | 79d5c0702bb6d7294c306ca65c3f5bade1c2aa51 (diff) | |
| download | rockbox-2c00cb5e835f99ad17f9bb3a8d7dbbac2b14c05a.zip rockbox-2c00cb5e835f99ad17f9bb3a8d7dbbac2b14c05a.tar.gz rockbox-2c00cb5e835f99ad17f9bb3a8d7dbbac2b14c05a.tar.bz2 rockbox-2c00cb5e835f99ad17f9bb3a8d7dbbac2b14c05a.tar.xz | |
wma: move inline functions into .h file
use 'static inline' instead of GCC extension 'inline'
some GCC don't support this (android NDK for example)
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27679 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
| -rw-r--r-- | apps/codecs/libwma/wmadeci.c | 6 | ||||
| -rw-r--r-- | apps/codecs/libwma/wmafixed.c | 129 | ||||
| -rw-r--r-- | apps/codecs/libwma/wmafixed.h | 129 |
3 files changed, 130 insertions, 134 deletions
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c index 9e448f4..a3edea0 100644 --- a/apps/codecs/libwma/wmadeci.c +++ b/apps/codecs/libwma/wmadeci.c @@ -30,11 +30,7 @@ #include "wmadata.h" static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len); -inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, - const fixed32 *window, int n); -inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, - const fixed32 *src1, int len); - + /*declarations of statically allocated variables used to remove malloc calls*/ fixed32 coefsarray[MAX_CHANNELS][BLOCK_MAX_SIZE] IBSS_ATTR; diff --git a/apps/codecs/libwma/wmafixed.c b/apps/codecs/libwma/wmafixed.c index 205bab4..bdf87a5 100644 --- a/apps/codecs/libwma/wmafixed.c +++ b/apps/codecs/libwma/wmafixed.c @@ -63,135 +63,6 @@ fixed64 Fixed32To64(fixed32 x) return (fixed64)x; } - -/* - * Helper functions for wma_window. - * - * - */ - -#ifdef CPU_ARM -inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, - const fixed32 *window, int n) -{ - /* Block sizes are always power of two */ - asm volatile ( - "0:" - "ldmia %[d]!, {r0, r1};" - "ldmia %[w]!, {r4, r5};" - /* consume the first data and window value so we can use those - * registers again */ - "smull r8, r9, r0, r4;" - "ldmia %[dst], {r0, r4};" - "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/ - "smull r8, r9, r1, r5;" - "add r1, r4, r9, lsl #1;" - "stmia %[dst]!, {r0, r1};" - "subs %[n], %[n], #2;" - "bne 0b;" - : [d] "+r" (data), [w] "+r" (window), [dst] "+r" (dst), [n] "+r" (n) - : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc"); -} - -inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, - int len) -{ - /* Block sizes are always power of two */ - asm volatile ( - "add %[s1], %[s1], %[n], lsl #2;" - "0:" - "ldmia %[s0]!, {r0, r1};" - "ldmdb %[s1]!, {r4, r5};" - "smull r8, r9, r0, r5;" - "mov r0, r9, lsl #1;" - "smull r8, r9, r1, r4;" - "mov r1, r9, lsl #1;" - "stmia %[dst]!, {r0, r1};" - "subs %[n], %[n], #2;" - "bne 0b;" - : [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len) - : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc"); -} - -#elif defined(CPU_COLDFIRE) - -inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, - const fixed32 *window, int n) -{ - /* Block sizes are always power of two. Smallest block is always way bigger - * than four too.*/ - asm volatile ( - "0:" - "movem.l (%[d]), %%d0-%%d3;" - "movem.l (%[w]), %%d4-%%d5/%%a0-%%a1;" - "mac.l %%d0, %%d4, %%acc0;" - "mac.l %%d1, %%d5, %%acc1;" - "mac.l %%d2, %%a0, %%acc2;" - "mac.l %%d3, %%a1, %%acc3;" - "lea.l (16, %[d]), %[d];" - "lea.l (16, %[w]), %[w];" - "movclr.l %%acc0, %%d0;" - "movclr.l %%acc1, %%d1;" - "movclr.l %%acc2, %%d2;" - "movclr.l %%acc3, %%d3;" - "movem.l (%[dst]), %%d4-%%d5/%%a0-%%a1;" - "add.l %%d4, %%d0;" - "add.l %%d5, %%d1;" - "add.l %%a0, %%d2;" - "add.l %%a1, %%d3;" - "movem.l %%d0-%%d3, (%[dst]);" - "lea.l (16, %[dst]), %[dst];" - "subq.l #4, %[n];" - "jne 0b;" - : [d] "+a" (data), [w] "+a" (window), [dst] "+a" (dst), [n] "+d" (n) - : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc"); -} - -inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, - int len) -{ - /* Block sizes are always power of two. Smallest block is always way bigger - * than four too.*/ - asm volatile ( - "lea.l (-16, %[s1], %[n]*4), %[s1];" - "0:" - "movem.l (%[s0]), %%d0-%%d3;" - "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;" - "mac.l %%d0, %%a1, %%acc0;" - "mac.l %%d1, %%a0, %%acc1;" - "mac.l %%d2, %%d5, %%acc2;" - "mac.l %%d3, %%d4, %%acc3;" - "lea.l (16, %[s0]), %[s0];" - "lea.l (-16, %[s1]), %[s1];" - "movclr.l %%acc0, %%d0;" - "movclr.l %%acc1, %%d1;" - "movclr.l %%acc2, %%d2;" - "movclr.l %%acc3, %%d3;" - "movem.l %%d0-%%d3, (%[dst]);" - "lea.l (16, %[dst]), %[dst];" - "subq.l #4, %[n];" - "jne 0b;" - : [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len) - : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc"); -} - -#else - -inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){ - int i; - for(i=0; i<len; i++) - dst[i] = fixmul32b(src0[i], src1[i]) + dst[i]; -} - -inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){ - int i; - src1 += len-1; - for(i=0; i<len; i++) - dst[i] = fixmul32b(src0[i], src1[-i]); -} - -#endif - /* Not performance senstitive code here diff --git a/apps/codecs/libwma/wmafixed.h b/apps/codecs/libwma/wmafixed.h index 7f04a95..4225f16 100644 --- a/apps/codecs/libwma/wmafixed.h +++ b/apps/codecs/libwma/wmafixed.h @@ -106,3 +106,132 @@ static inline fixed32 fixmul32(fixed32 x, fixed32 y) } #endif + + +/* + * Helper functions for wma_window. + * + * + */ + +#ifdef CPU_ARM +static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, + const fixed32 *window, int n) +{ + /* Block sizes are always power of two */ + asm volatile ( + "0:" + "ldmia %[d]!, {r0, r1};" + "ldmia %[w]!, {r4, r5};" + /* consume the first data and window value so we can use those + * registers again */ + "smull r8, r9, r0, r4;" + "ldmia %[dst], {r0, r4};" + "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/ + "smull r8, r9, r1, r5;" + "add r1, r4, r9, lsl #1;" + "stmia %[dst]!, {r0, r1};" + "subs %[n], %[n], #2;" + "bne 0b;" + : [d] "+r" (data), [w] "+r" (window), [dst] "+r" (dst), [n] "+r" (n) + : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc"); +} + +static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, + int len) +{ + /* Block sizes are always power of two */ + asm volatile ( + "add %[s1], %[s1], %[n], lsl #2;" + "0:" + "ldmia %[s0]!, {r0, r1};" + "ldmdb %[s1]!, {r4, r5};" + "smull r8, r9, r0, r5;" + "mov r0, r9, lsl #1;" + "smull r8, r9, r1, r4;" + "mov r1, r9, lsl #1;" + "stmia %[dst]!, {r0, r1};" + "subs %[n], %[n], #2;" + "bne 0b;" + : [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len) + : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc"); +} + +#elif defined(CPU_COLDFIRE) + +static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, + const fixed32 *window, int n) +{ + /* Block sizes are always power of two. Smallest block is always way bigger + * than four too.*/ + asm volatile ( + "0:" + "movem.l (%[d]), %%d0-%%d3;" + "movem.l (%[w]), %%d4-%%d5/%%a0-%%a1;" + "mac.l %%d0, %%d4, %%acc0;" + "mac.l %%d1, %%d5, %%acc1;" + "mac.l %%d2, %%a0, %%acc2;" + "mac.l %%d3, %%a1, %%acc3;" + "lea.l (16, %[d]), %[d];" + "lea.l (16, %[w]), %[w];" + "movclr.l %%acc0, %%d0;" + "movclr.l %%acc1, %%d1;" + "movclr.l %%acc2, %%d2;" + "movclr.l %%acc3, %%d3;" + "movem.l (%[dst]), %%d4-%%d5/%%a0-%%a1;" + "add.l %%d4, %%d0;" + "add.l %%d5, %%d1;" + "add.l %%a0, %%d2;" + "add.l %%a1, %%d3;" + "movem.l %%d0-%%d3, (%[dst]);" + "lea.l (16, %[dst]), %[dst];" + "subq.l #4, %[n];" + "jne 0b;" + : [d] "+a" (data), [w] "+a" (window), [dst] "+a" (dst), [n] "+d" (n) + : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc"); +} + +static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, + int len) +{ + /* Block sizes are always power of two. Smallest block is always way bigger + * than four too.*/ + asm volatile ( + "lea.l (-16, %[s1], %[n]*4), %[s1];" + "0:" + "movem.l (%[s0]), %%d0-%%d3;" + "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;" + "mac.l %%d0, %%a1, %%acc0;" + "mac.l %%d1, %%a0, %%acc1;" + "mac.l %%d2, %%d5, %%acc2;" + "mac.l %%d3, %%d4, %%acc3;" + "lea.l (16, %[s0]), %[s0];" + "lea.l (-16, %[s1]), %[s1];" + "movclr.l %%acc0, %%d0;" + "movclr.l %%acc1, %%d1;" + "movclr.l %%acc2, %%d2;" + "movclr.l %%acc3, %%d3;" + "movem.l %%d0-%%d3, (%[dst]);" + "lea.l (16, %[dst]), %[dst];" + "subq.l #4, %[n];" + "jne 0b;" + : [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len) + : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc"); +} + +#else + +static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){ + int i; + for(i=0; i<len; i++) + dst[i] = fixmul32b(src0[i], src1[i]) + dst[i]; +} + +static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){ + int i; + src1 += len-1; + for(i=0; i<len; i++) + dst[i] = fixmul32b(src0[i], src1[-i]); +} + +#endif |