diff options
| author | Nils Wallménius <nils@rockbox.org> | 2010-07-26 11:15:25 +0000 |
|---|---|---|
| committer | Nils Wallménius <nils@rockbox.org> | 2010-07-26 11:15:25 +0000 |
| commit | 025eed5c745513ff181762a172d2eda7b886c40d (patch) | |
| tree | 79cf0e0eb23c33f25e84db1bd900a8a4b49bdd17 /apps/codecs/libwmapro/wmapro_math.h | |
| parent | 53b5abd93c424c6add261df52b07f4a413f1189b (diff) | |
| download | rockbox-025eed5c745513ff181762a172d2eda7b886c40d.zip rockbox-025eed5c745513ff181762a172d2eda7b886c40d.tar.gz rockbox-025eed5c745513ff181762a172d2eda7b886c40d.tar.bz2 rockbox-025eed5c745513ff181762a172d2eda7b886c40d.tar.xz | |
libwmapro: coldfire asm for vector_fixmul_window, gives a speedup of ~13%, drop the add_bias argument for the vector_fixmul_window function, since it was always 0
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27573 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libwmapro/wmapro_math.h')
| -rw-r--r-- | apps/codecs/libwmapro/wmapro_math.h | 43 |
1 files changed, 36 insertions, 7 deletions
diff --git a/apps/codecs/libwmapro/wmapro_math.h b/apps/codecs/libwmapro/wmapro_math.h index 823c002..71cc3d3 100644 --- a/apps/codecs/libwmapro/wmapro_math.h +++ b/apps/codecs/libwmapro/wmapro_math.h @@ -19,10 +19,39 @@ static inline int32_t fixmulshift(int32_t x, int32_t y, int shamt) return (int32_t)temp; } +#ifdef CPU_COLDFIRE +static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, + const int32_t *src1, const int32_t *win, + int len) +{ + int i, j; + dst += len; + win += len; + src0+= len; + for(i=-len, j=len-1; i<0; i++, j--) { + int32_t s0 = src0[i]; + int32_t s1 = src1[j]; + int32_t wi = -win[i]; + int32_t wj = -win[j]; + asm volatile ("mac.l %[s0], %[wj], %%acc0\n\t" + "msac.l %[s1], %[wi], %%acc0\n\t" + "mac.l %[s0], %[wi], %%acc1\n\t" + "mac.l %[s1], %[wj], %%acc1\n\t" + "movclr.l %%acc0, %[s0]\n\t" + "move.l %[s0], (%[dst_i])\n\t" + "movclr.l %%acc1, %[s0]\n\t" + "move.l %[s0], (%[dst_j])\n\t" + : [s0] "+r" (s0) /* this register is clobbered so specify it as an input */ + : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]), + [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj) + : "cc", "memory"); + } +} +#else static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, - int32_t add_bias, int len) + int len) { int i, j; dst += len; @@ -31,13 +60,13 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, for(i=-len, j=len-1; i<0; i++, j--) { int32_t s0 = src0[i]; int32_t s1 = src1[j]; - int32_t wi = win[i]; - int32_t wj = win[j]; - dst[i] = fixmulshift(s0,-1*wj,31) - fixmulshift(s1,-1*wi,31) + (add_bias<<16); - dst[j] = fixmulshift(s0,-1*wi,31) + fixmulshift(s1,-1*wj,31) + (add_bias<<16); - } - + int32_t wi = -win[i]; + int32_t wj = -win[j]; + dst[i] = fixmulshift(s0,wj,31) - fixmulshift(s1,wi,31); + dst[j] = fixmulshift(s0,wi,31) + fixmulshift(s1,wj,31); + } } +#endif static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, int32_t mul, int len, int shift) |