summaryrefslogtreecommitdiff
path: root/apps/codecs
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2007-11-07 00:50:37 +0000
committerThom Johansen <thomj@rockbox.org>2007-11-07 00:50:37 +0000
commitc7a8663c7bd6ccb9568fc31b6bad4f5a38aebb8e (patch)
treedcc4008ad362fa1f9d517e7ff543f5456bf6d8cf /apps/codecs
parent71b40994e0c58cb3cb97ac91a0f771dce182662f (diff)
downloadrockbox-c7a8663c7bd6ccb9568fc31b6bad4f5a38aebb8e.zip
rockbox-c7a8663c7bd6ccb9568fc31b6bad4f5a38aebb8e.tar.gz
rockbox-c7a8663c7bd6ccb9568fc31b6bad4f5a38aebb8e.tar.bz2
rockbox-c7a8663c7bd6ccb9568fc31b6bad4f5a38aebb8e.tar.xz
Assembler versions of signal_mul. Decent speedup for Coldfire and small speedup for ARM.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15502 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
-rw-r--r--apps/codecs/libspeex/filters.c4
-rw-r--r--apps/codecs/libspeex/filters_arm4.S27
-rw-r--r--apps/codecs/libspeex/filters_cf.S42
3 files changed, 72 insertions, 1 deletions
diff --git a/apps/codecs/libspeex/filters.c b/apps/codecs/libspeex/filters.c
index 13adc9c..a8a2e0b 100644
--- a/apps/codecs/libspeex/filters.c
+++ b/apps/codecs/libspeex/filters.c
@@ -47,9 +47,11 @@
#include "filters_arm4.h"
#define OVERRIDE_IIR_MEM16
#define OVERRIDE_QMF_SYNTH
+#define OVERRIDE_SIGNAL_MUL
#elif defined (COLDFIRE_ASM)
#define OVERRIDE_IIR_MEM16
#define OVERRIDE_QMF_SYNTH
+#define OVERRIDE_SIGNAL_MUL
#elif defined (BFIN_ASM)
#include "filters_bfin.h"
#endif
@@ -114,6 +116,7 @@ void highpass(const spx_word16_t *x, spx_word16_t *y, int len, int filtID, spx_m
#ifdef FIXED_POINT
+#ifndef OVERRIDE_SIGNAL_MUL
/* FIXME: These functions are ugly and probably introduce too much error */
void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
{
@@ -123,6 +126,7 @@ void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
y[i] = SHL32(MULT16_32_Q14(EXTRACT16(SHR32(x[i],7)),scale),7);
}
}
+#endif
#ifndef SPEEX_DISABLE_ENCODER
void signal_div(const spx_word16_t *x, spx_word16_t *y, spx_word32_t scale, int len)
diff --git a/apps/codecs/libspeex/filters_arm4.S b/apps/codecs/libspeex/filters_arm4.S
index 7924e70..e0b33c8 100644
--- a/apps/codecs/libspeex/filters_arm4.S
+++ b/apps/codecs/libspeex/filters_arm4.S
@@ -199,7 +199,7 @@ qmf_synth:
bne 0b
sub r0, r8, r5 @ r0 = &xx1[N2]
- sub r1, r9, r5 @ r1 = %xx2[N2]
+ sub r1, r9, r5 @ r1 = &xx2[N2]
str r4, [sp, #-4] @ Stack N
mov r4, r5
str r4, [sp, #-8] @ Stack M
@@ -300,3 +300,28 @@ qmf_synth:
bne 0b
ldmia sp!, { r4-r11, pc } @ Exit
+
+/* void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len) */
+ .global signal_mul
+signal_mul:
+ stmdb sp!, { r4-r8, lr }
+0:
+ ldmia r0!, { r5-r8 } @ Load four input samples
+ smull r5, r12, r2, r5
+ mov r12, r12, lsl #18 @ Recombine upper and lower parts
+ orr r5, r12, r5, lsr #14
+ smull r6, r12, r2, r6
+ mov r12, r12, lsl #18
+ orr r6, r12, r6, lsr #14
+ smull r7, r12, r2, r7
+ mov r12, r12, lsl #18
+ orr r7, r12, r7, lsr #14
+ smull r8, r12, r2, r8
+ mov r12, r12, lsl #18
+ orr r8, r12, r8, lsr #14
+ stmia r1!, { r5-r8 } @ Store four output samples
+ subs r3, r3, #4 @ Are we done?
+ bne 0b
+
+ ldmia sp!, { r4-r8, pc } @ Exit
+
diff --git a/apps/codecs/libspeex/filters_cf.S b/apps/codecs/libspeex/filters_cf.S
index 861d6c1..a48af85 100644
--- a/apps/codecs/libspeex/filters_cf.S
+++ b/apps/codecs/libspeex/filters_cf.S
@@ -312,3 +312,45 @@ qmf_synth:
lea.l (44, %sp), %sp
rts
+
+/* void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len) */
+ .global signal_mul
+signal_mul:
+ lea.l (-20, %sp), %sp
+ movem.l %d2-%d6, (%sp)
+ movem.l (20+4, %sp), %a0-%a1 | a0 = x, a1 = y
+ movem.l (20+12, %sp), %d0-%d1 | d0 = scale, d1 = len
+ moveq.l #0x20, %d6
+ move.l %d6, %macsr | Set MAC unit to fractional mode
+ asl.l #3, %d0 | Pre-scale 'scale'
+ moveq.l #9, %d6
+0:
+ movem.l (%a0), %d2-%d5 | Fetch input
+ asl.l %d6, %d2 | Shift each value 9 to the left
+ asl.l %d6, %d3
+ asl.l %d6, %d4
+ asl.l %d6, %d5
+ mac.l %d2, %d0, %acc0 | Do multiplies
+ mac.l %d3, %d0, %acc1
+ mac.l %d4, %d0, %acc2
+ mac.l %d5, %d0, %acc3
+ lea.l (16, %a0), %a0
+ movclr.l %acc0, %d2
+ movclr.l %acc1, %d3
+ movclr.l %acc2, %d4
+ movclr.l %acc3, %d5
+ asl.l #5, %d2 | Adjust to proper format
+ asl.l #5, %d3
+ asl.l #5, %d4
+ asl.l #5, %d5
+ movem.l %d2-%d5, (%a1) | Save output
+ lea.l (16, %a1), %a1
+ subq.l #4, %d1
+ jne 0b
+
+ clr.l %d0
+ move.l %d0, %macsr | Set MAC unit back to integer mode
+ movem.l (%sp), %d2-%d6
+ lea.l (20, %sp), %sp
+ rts
+