summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomasz Malesinski <tomal@rockbox.org>2007-03-24 13:45:54 +0000
committerTomasz Malesinski <tomal@rockbox.org>2007-03-24 13:45:54 +0000
commit25046aac17e09467aa1f4d240fb74db51f37e70c (patch)
tree5d69b6f3faa49457d68879949e76da00329ca71f
parent467651ae763107d478799586a1061693cafe6dab (diff)
downloadrockbox-25046aac17e09467aa1f4d240fb74db51f37e70c.zip
rockbox-25046aac17e09467aa1f4d240fb74db51f37e70c.tar.gz
rockbox-25046aac17e09467aa1f4d240fb74db51f37e70c.tar.bz2
rockbox-25046aac17e09467aa1f4d240fb74db51f37e70c.tar.xz
FS #6848 - fast vector operations for ARM in Tremor.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12902 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/Tremor/asm_arm.h106
-rw-r--r--apps/codecs/Tremor/asm_mcf5249.h13
-rw-r--r--apps/codecs/Tremor/block.c43
-rw-r--r--apps/codecs/Tremor/misc.h45
-rw-r--r--apps/codecs/Tremor/window.c20
5 files changed, 168 insertions, 59 deletions
diff --git a/apps/codecs/Tremor/asm_arm.h b/apps/codecs/Tremor/asm_arm.h
index e623ce9..bc09ac5 100644
--- a/apps/codecs/Tremor/asm_arm.h
+++ b/apps/codecs/Tremor/asm_arm.h
@@ -95,6 +95,112 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
*y = y1 << 1;
}
+#ifndef _V_VECT_OPS
+#define _V_VECT_OPS
+
+/* asm versions of vector operations for block.c, window.c */
+static inline
+void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
+{
+ while (n>=4) {
+ asm volatile ("ldmia %[x], {r0, r1, r2, r3};"
+ "ldmia %[y]!, {r4, r5, r6, r7};"
+ "add r0, r0, r4;"
+ "add r1, r1, r5;"
+ "add r2, r2, r6;"
+ "add r3, r3, r7;"
+ "stmia %[x]!, {r0, r1, r2, r3};"
+ : [x] "+r" (x), [y] "+r" (y)
+ : : "r0", "r1", "r2", "r3",
+ "r4", "r5", "r6", "r7",
+ "memory");
+ n -= 4;
+ }
+ /* add final elements */
+ while (n>0) {
+ *x++ += *y++;
+ n--;
+ }
+}
+
+static inline
+void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
+{
+ while (n>=4) {
+ asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};"
+ "stmia %[x]!, {r0, r1, r2, r3};"
+ : [x] "+r" (x), [y] "+r" (y)
+ : : "r0", "r1", "r2", "r3",
+ "memory");
+ n -= 4;
+ }
+ /* copy final elements */
+ while (n>0) {
+ *x++ = *y++;
+ n--;
+ }
+}
+
+static inline
+void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
+{
+ while (n>=4) {
+ asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
+ "ldmia %[w]!, {r4, r5, r6, r7};"
+ "smull r8, r9, r0, r4;"
+ "mov r0, r9, lsl #1;"
+ "smull r8, r9, r1, r5;"
+ "mov r1, r9, lsl #1;"
+ "smull r8, r9, r2, r6;"
+ "mov r2, r9, lsl #1;"
+ "smull r8, r9, r3, r7;"
+ "mov r3, r9, lsl #1;"
+ "stmia %[d]!, {r0, r1, r2, r3};"
+ : [d] "+r" (data), [w] "+r" (window)
+ : : "r0", "r1", "r2", "r3",
+ "r4", "r5", "r6", "r7", "r8", "r9",
+ "memory", "cc");
+ n -= 4;
+ }
+ while(n>0) {
+ *data = MULT31(*data, *window);
+ data++;
+ window++;
+ n--;
+ }
+}
+
+static inline
+void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
+{
+ while (n>=4) {
+ asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
+ "ldmda %[w]!, {r4, r5, r6, r7};"
+ "smull r8, r9, r0, r7;"
+ "mov r0, r9, lsl #1;"
+ "smull r8, r9, r1, r6;"
+ "mov r1, r9, lsl #1;"
+ "smull r8, r9, r2, r5;"
+ "mov r2, r9, lsl #1;"
+ "smull r8, r9, r3, r4;"
+ "mov r3, r9, lsl #1;"
+ "stmia %[d]!, {r0, r1, r2, r3};"
+ : [d] "+r" (data), [w] "+r" (window)
+ : : "r0", "r1", "r2", "r3",
+ "r4", "r5", "r6", "r7", "r8", "r9",
+ "memory", "cc");
+ n -= 4;
+ }
+ while(n>0) {
+ *data = MULT31(*data, *window);
+ data++;
+ window--;
+ n--;
+ }
+}
+
+#endif
+
#endif
#ifndef _V_CLIP_MATH
diff --git a/apps/codecs/Tremor/asm_mcf5249.h b/apps/codecs/Tremor/asm_mcf5249.h
index 16878d7..4d7f92c 100644
--- a/apps/codecs/Tremor/asm_mcf5249.h
+++ b/apps/codecs/Tremor/asm_mcf5249.h
@@ -132,10 +132,13 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b,
[t] "r" (_t), [v] "r" (_v) \
: "cc");
+#ifndef _V_VECT_OPS
+#define _V_VECT_OPS
+
/* asm versions of vector operations for block.c, window.c */
/* assumes MAC is initialized & accumulators cleared */
static inline
-void mcf5249_vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
+void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
{
/* align to 16 bytes */
while(n>0 && (int)x&16) {
@@ -169,7 +172,7 @@ void mcf5249_vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
}
static inline
-void mcf5249_vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
+void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
{
/* align to 16 bytes */
while(n>0 && (int)x&16) {
@@ -196,7 +199,7 @@ void mcf5249_vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
static inline
-void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
+void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
/* ensure data is aligned to 16-bytes */
while(n>0 && (int)data%16) {
@@ -250,7 +253,7 @@ void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
}
static inline
-void mcf5249_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
+void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
/* ensure at least data is aligned to 16-bytes */
while(n>0 && (int)data%16) {
@@ -338,6 +341,8 @@ void mcf5249_vect_zero(ogg_int32_t *ptr, int n)
#endif
+#endif
+
#ifndef _V_CLIP_MATH
#define _V_CLIP_MATH
diff --git a/apps/codecs/Tremor/block.c b/apps/codecs/Tremor/block.c
index 9dce13c..80cbb78 100644
--- a/apps/codecs/Tremor/block.c
+++ b/apps/codecs/Tremor/block.c
@@ -262,11 +262,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
vorbis_info *vi=v->vi;
codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
private_state *b=v->backend_state;
-#ifdef CPU_COLDFIRE
int j;
-#else
- int i,j;
-#endif
if(v->pcm_current>v->pcm_returned && v->pcm_returned!=-1)return(OV_EINVAL);
@@ -312,47 +308,25 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
/* large/large */
ogg_int32_t *pcm=v->pcm[j]+prevCenter;
ogg_int32_t *p=vb->pcm[j];
-#ifdef CPU_COLDFIRE
- mcf5249_vect_add(pcm, p, n1);
-#else
- for(i=0;i<n1;i++)
- pcm[i]+=p[i];
-#endif
+ vect_add(pcm, p, n1);
}else{
/* large/small */
ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2;
ogg_int32_t *p=vb->pcm[j];
-#ifdef CPU_COLDFIRE
- mcf5249_vect_add(pcm, p, n0);
-#else
- for(i=0;i<n0;i++)
- pcm[i]+=p[i];
-#endif
+ vect_add(pcm, p, n0);
}
}else{
if(v->W){
/* small/large */
ogg_int32_t *pcm=v->pcm[j]+prevCenter;
ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2;
-#ifdef CPU_COLDFIRE
- mcf5249_vect_add(pcm, p, n0);
- mcf5249_vect_copy(&pcm[n0], &p[n0], n1/2-n0/2);
-#else
- for(i=0;i<n0;i++)
- pcm[i]+=p[i];
- for(;i<n1/2+n0/2;i++)
- pcm[i]=p[i];
-#endif
+ vect_add(pcm, p, n0);
+ vect_copy(&pcm[n0], &p[n0], n1/2-n0/2);
}else{
/* small/small */
ogg_int32_t *pcm=v->pcm[j]+prevCenter;
ogg_int32_t *p=vb->pcm[j];
-#ifdef CPU_COLDFIRE
- mcf5249_vect_add(pcm, p, n0);
-#else
- for(i=0;i<n0;i++)
- pcm[i]+=p[i];
-#endif
+ vect_add(pcm, p, n0);
}
}
@@ -360,12 +334,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
{
ogg_int32_t *pcm=v->pcm[j]+thisCenter;
ogg_int32_t *p=vb->pcm[j]+n;
-#ifdef CPU_COLDFIRE
- mcf5249_vect_copy(pcm, p, n);
-#else
- for(i=0;i<n;i++)
- pcm[i]=p[i];
-#endif
+ vect_copy(pcm, p, n);
}
}
diff --git a/apps/codecs/Tremor/misc.h b/apps/codecs/Tremor/misc.h
index 81903e1..a6eb0fa 100644
--- a/apps/codecs/Tremor/misc.h
+++ b/apps/codecs/Tremor/misc.h
@@ -151,6 +151,51 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
*y = MULT31(b, t) + MULT31(a, v);
}
#endif
+
+#ifndef _V_VECT_OPS
+#define _V_VECT_OPS
+
+static inline
+void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
+{
+ while (n>0) {
+ *x++ += *y++;
+ n--;
+ }
+}
+
+static inline
+void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
+{
+ while (n>0) {
+ *x++ = *y++;
+ n--;
+ }
+}
+
+static inline
+void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
+{
+ while(n>0) {
+ *data = MULT31(*data, *window);
+ data++;
+ window++;
+ n--;
+ }
+}
+
+static inline
+void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
+{
+ while(n>0) {
+ *data = MULT31(*data, *window);
+ data++;
+ window--;
+ n--;
+ }
+}
+#endif
+
#endif
#ifndef _V_CLIP_MATH
diff --git a/apps/codecs/Tremor/window.c b/apps/codecs/Tremor/window.c
index 5c7b83f..14d97cf 100644
--- a/apps/codecs/Tremor/window.c
+++ b/apps/codecs/Tremor/window.c
@@ -68,27 +68,11 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2],
long rightbegin=n/2+n/4-rn/4;
long rightend=rightbegin+rn/2;
-#ifdef CPU_COLDFIRE
memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin);
/* mcf5249_vect_zero(&d[0], leftbegin); */
- mcf5249_vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin);
- mcf5249_vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin);
+ vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin);
+ vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin);
memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend));
/* mcf5249_vect_zero(&d[rightend], n-rightend); */
-#else
- int i,p;
-
- for(i=0;i<leftbegin;i++)
- d[i]=0;
-
- for(p=0;i<leftend;i++,p++)
- d[i]=MULT31(d[i],window[lW][p]);
-
- for(i=rightbegin,p=rn/2-1;i<rightend;i++,p--)
- d[i]=MULT31(d[i],window[nW][p]);
-
- for(;i<n;i++)
- d[i]=0;
-#endif
}