diff options
| author | Tomasz Malesinski <tomal@rockbox.org> | 2007-03-24 13:45:54 +0000 |
|---|---|---|
| committer | Tomasz Malesinski <tomal@rockbox.org> | 2007-03-24 13:45:54 +0000 |
| commit | 25046aac17e09467aa1f4d240fb74db51f37e70c (patch) | |
| tree | 5d69b6f3faa49457d68879949e76da00329ca71f | |
| parent | 467651ae763107d478799586a1061693cafe6dab (diff) | |
| download | rockbox-25046aac17e09467aa1f4d240fb74db51f37e70c.zip rockbox-25046aac17e09467aa1f4d240fb74db51f37e70c.tar.gz rockbox-25046aac17e09467aa1f4d240fb74db51f37e70c.tar.bz2 rockbox-25046aac17e09467aa1f4d240fb74db51f37e70c.tar.xz | |
FS #6848 - fast vector operations for ARM in Tremor.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12902 a1c6a512-1295-4272-9138-f99709370657
| -rw-r--r-- | apps/codecs/Tremor/asm_arm.h | 106 | ||||
| -rw-r--r-- | apps/codecs/Tremor/asm_mcf5249.h | 13 | ||||
| -rw-r--r-- | apps/codecs/Tremor/block.c | 43 | ||||
| -rw-r--r-- | apps/codecs/Tremor/misc.h | 45 | ||||
| -rw-r--r-- | apps/codecs/Tremor/window.c | 20 |
5 files changed, 168 insertions, 59 deletions
diff --git a/apps/codecs/Tremor/asm_arm.h b/apps/codecs/Tremor/asm_arm.h index e623ce9..bc09ac5 100644 --- a/apps/codecs/Tremor/asm_arm.h +++ b/apps/codecs/Tremor/asm_arm.h @@ -95,6 +95,112 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, *y = y1 << 1; } +#ifndef _V_VECT_OPS +#define _V_VECT_OPS + +/* asm versions of vector operations for block.c, window.c */ +static inline +void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) +{ + while (n>=4) { + asm volatile ("ldmia %[x], {r0, r1, r2, r3};" + "ldmia %[y]!, {r4, r5, r6, r7};" + "add r0, r0, r4;" + "add r1, r1, r5;" + "add r2, r2, r6;" + "add r3, r3, r7;" + "stmia %[x]!, {r0, r1, r2, r3};" + : [x] "+r" (x), [y] "+r" (y) + : : "r0", "r1", "r2", "r3", + "r4", "r5", "r6", "r7", + "memory"); + n -= 4; + } + /* add final elements */ + while (n>0) { + *x++ += *y++; + n--; + } +} + +static inline +void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) +{ + while (n>=4) { + asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};" + "stmia %[x]!, {r0, r1, r2, r3};" + : [x] "+r" (x), [y] "+r" (y) + : : "r0", "r1", "r2", "r3", + "memory"); + n -= 4; + } + /* copy final elements */ + while (n>0) { + *x++ = *y++; + n--; + } +} + +static inline +void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) +{ + while (n>=4) { + asm volatile ("ldmia %[d], {r0, r1, r2, r3};" + "ldmia %[w]!, {r4, r5, r6, r7};" + "smull r8, r9, r0, r4;" + "mov r0, r9, lsl #1;" + "smull r8, r9, r1, r5;" + "mov r1, r9, lsl #1;" + "smull r8, r9, r2, r6;" + "mov r2, r9, lsl #1;" + "smull r8, r9, r3, r7;" + "mov r3, r9, lsl #1;" + "stmia %[d]!, {r0, r1, r2, r3};" + : [d] "+r" (data), [w] "+r" (window) + : : "r0", "r1", "r2", "r3", + "r4", "r5", "r6", "r7", "r8", "r9", + "memory", "cc"); + n -= 4; + } + while(n>0) { + *data = MULT31(*data, *window); + data++; + window++; + n--; + } +} + +static inline +void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) +{ + while (n>=4) { + asm volatile ("ldmia %[d], {r0, r1, r2, r3};" + "ldmda %[w]!, {r4, r5, r6, r7};" + "smull r8, r9, r0, r7;" + "mov r0, r9, lsl #1;" + "smull r8, r9, r1, r6;" + "mov r1, r9, lsl #1;" + "smull r8, r9, r2, r5;" + "mov r2, r9, lsl #1;" + "smull r8, r9, r3, r4;" + "mov r3, r9, lsl #1;" + "stmia %[d]!, {r0, r1, r2, r3};" + : [d] "+r" (data), [w] "+r" (window) + : : "r0", "r1", "r2", "r3", + "r4", "r5", "r6", "r7", "r8", "r9", + "memory", "cc"); + n -= 4; + } + while(n>0) { + *data = MULT31(*data, *window); + data++; + window--; + n--; + } +} + +#endif + #endif #ifndef _V_CLIP_MATH diff --git a/apps/codecs/Tremor/asm_mcf5249.h b/apps/codecs/Tremor/asm_mcf5249.h index 16878d7..4d7f92c 100644 --- a/apps/codecs/Tremor/asm_mcf5249.h +++ b/apps/codecs/Tremor/asm_mcf5249.h @@ -132,10 +132,13 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b, [t] "r" (_t), [v] "r" (_v) \ : "cc"); +#ifndef _V_VECT_OPS +#define _V_VECT_OPS + /* asm versions of vector operations for block.c, window.c */ /* assumes MAC is initialized & accumulators cleared */ static inline -void mcf5249_vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) +void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) { /* align to 16 bytes */ while(n>0 && (int)x&16) { @@ -169,7 +172,7 @@ void mcf5249_vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) } static inline -void mcf5249_vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) +void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) { /* align to 16 bytes */ while(n>0 && (int)x&16) { @@ -196,7 +199,7 @@ void mcf5249_vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) static inline -void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) +void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) { /* ensure data is aligned to 16-bytes */ while(n>0 && (int)data%16) { @@ -250,7 +253,7 @@ void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) } static inline -void mcf5249_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) +void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) { /* ensure at least data is aligned to 16-bytes */ while(n>0 && (int)data%16) { @@ -338,6 +341,8 @@ void mcf5249_vect_zero(ogg_int32_t *ptr, int n) #endif +#endif + #ifndef _V_CLIP_MATH #define _V_CLIP_MATH diff --git a/apps/codecs/Tremor/block.c b/apps/codecs/Tremor/block.c index 9dce13c..80cbb78 100644 --- a/apps/codecs/Tremor/block.c +++ b/apps/codecs/Tremor/block.c @@ -262,11 +262,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ vorbis_info *vi=v->vi; codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; private_state *b=v->backend_state; -#ifdef CPU_COLDFIRE int j; -#else - int i,j; -#endif if(v->pcm_current>v->pcm_returned && v->pcm_returned!=-1)return(OV_EINVAL); @@ -312,47 +308,25 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ /* large/large */ ogg_int32_t *pcm=v->pcm[j]+prevCenter; ogg_int32_t *p=vb->pcm[j]; -#ifdef CPU_COLDFIRE - mcf5249_vect_add(pcm, p, n1); -#else - for(i=0;i<n1;i++) - pcm[i]+=p[i]; -#endif + vect_add(pcm, p, n1); }else{ /* large/small */ ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2; ogg_int32_t *p=vb->pcm[j]; -#ifdef CPU_COLDFIRE - mcf5249_vect_add(pcm, p, n0); -#else - for(i=0;i<n0;i++) - pcm[i]+=p[i]; -#endif + vect_add(pcm, p, n0); } }else{ if(v->W){ /* small/large */ ogg_int32_t *pcm=v->pcm[j]+prevCenter; ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2; -#ifdef CPU_COLDFIRE - mcf5249_vect_add(pcm, p, n0); - mcf5249_vect_copy(&pcm[n0], &p[n0], n1/2-n0/2); -#else - for(i=0;i<n0;i++) - pcm[i]+=p[i]; - for(;i<n1/2+n0/2;i++) - pcm[i]=p[i]; -#endif + vect_add(pcm, p, n0); + vect_copy(&pcm[n0], &p[n0], n1/2-n0/2); }else{ /* small/small */ ogg_int32_t *pcm=v->pcm[j]+prevCenter; ogg_int32_t *p=vb->pcm[j]; -#ifdef CPU_COLDFIRE - mcf5249_vect_add(pcm, p, n0); -#else - for(i=0;i<n0;i++) - pcm[i]+=p[i]; -#endif + vect_add(pcm, p, n0); } } @@ -360,12 +334,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ { ogg_int32_t *pcm=v->pcm[j]+thisCenter; ogg_int32_t *p=vb->pcm[j]+n; -#ifdef CPU_COLDFIRE - mcf5249_vect_copy(pcm, p, n); -#else - for(i=0;i<n;i++) - pcm[i]=p[i]; -#endif + vect_copy(pcm, p, n); } } diff --git a/apps/codecs/Tremor/misc.h b/apps/codecs/Tremor/misc.h index 81903e1..a6eb0fa 100644 --- a/apps/codecs/Tremor/misc.h +++ b/apps/codecs/Tremor/misc.h @@ -151,6 +151,51 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, *y = MULT31(b, t) + MULT31(a, v); } #endif + +#ifndef _V_VECT_OPS +#define _V_VECT_OPS + +static inline +void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) +{ + while (n>0) { + *x++ += *y++; + n--; + } +} + +static inline +void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) +{ + while (n>0) { + *x++ = *y++; + n--; + } +} + +static inline +void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) +{ + while(n>0) { + *data = MULT31(*data, *window); + data++; + window++; + n--; + } +} + +static inline +void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) +{ + while(n>0) { + *data = MULT31(*data, *window); + data++; + window--; + n--; + } +} +#endif + #endif #ifndef _V_CLIP_MATH diff --git a/apps/codecs/Tremor/window.c b/apps/codecs/Tremor/window.c index 5c7b83f..14d97cf 100644 --- a/apps/codecs/Tremor/window.c +++ b/apps/codecs/Tremor/window.c @@ -68,27 +68,11 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2], long rightbegin=n/2+n/4-rn/4; long rightend=rightbegin+rn/2; -#ifdef CPU_COLDFIRE memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin); /* mcf5249_vect_zero(&d[0], leftbegin); */ - mcf5249_vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin); - mcf5249_vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin); + vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin); + vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin); memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); /* mcf5249_vect_zero(&d[rightend], n-rightend); */ -#else - int i,p; - - for(i=0;i<leftbegin;i++) - d[i]=0; - - for(p=0;i<leftend;i++,p++) - d[i]=MULT31(d[i],window[lW][p]); - - for(i=rightbegin,p=rn/2-1;i<rightend;i++,p--) - d[i]=MULT31(d[i],window[nW][p]); - - for(;i<n;i++) - d[i]=0; -#endif } |