summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2008-03-19 13:55:53 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2008-03-19 13:55:53 +0000
commitfd052ec753cade16675e211ced0a2be19c0d545f (patch)
tree094375afe1644abe2a312bb7feee885dcbdb64c0
parent178df1cfcfa529c58ad37922d6d934e1e0328fc5 (diff)
downloadrockbox-fd052ec753cade16675e211ced0a2be19c0d545f.zip
rockbox-fd052ec753cade16675e211ced0a2be19c0d545f.tar.gz
rockbox-fd052ec753cade16675e211ced0a2be19c0d545f.tar.bz2
rockbox-fd052ec753cade16675e211ced0a2be19c0d545f.tar.xz
Commit FS#8750. Add ARM assembler for the dsp-functions channels_process_sound_chan_mono(), channels_process_sound_chan_karaoke(), sample_output_mono() and sample_output_stereo(). By measurement the speed up is ~75% for the first three functions and ~40% for sample_output_stereo(). Additionally avoid calling yield() to often in dsp.c -- it is now limited to once per tick.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@16717 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/dsp.c9
-rw-r--r--apps/dsp_arm.S177
-rw-r--r--apps/dsp_asm.h4
3 files changed, 189 insertions, 1 deletions
diff --git a/apps/dsp.c b/apps/dsp.c
index 3c2d7f6..5bbbe08 100644
--- a/apps/dsp.c
+++ b/apps/dsp.c
@@ -1112,6 +1112,7 @@ int dsp_callback(int msg, intptr_t param)
int dsp_process(struct dsp_config *dsp, char *dst, const char *src[], int count)
{
int32_t *tmp[2];
+ long last_yield = current_tick;
int written = 0;
int samples;
@@ -1159,7 +1160,13 @@ int dsp_process(struct dsp_config *dsp, char *dst, const char *src[], int count)
written += samples;
dst += samples * sizeof (int16_t) * 2;
- yield();
+
+ /* yield at least once each tick */
+ if (current_tick > last_yield)
+ {
+ yield();
+ last_yield = current_tick;
+ }
}
#if defined(CPU_COLDFIRE)
diff --git a/apps/dsp_arm.S b/apps/dsp_arm.S
index c3e5c7c..751e0f5 100644
--- a/apps/dsp_arm.S
+++ b/apps/dsp_arm.S
@@ -18,6 +18,183 @@
****************************************************************************/
/****************************************************************************
+ * void channels_process_sound_chan_mono(int count, int32_t *buf[])
+ *
+ * NOTE: The following code processes two samples at once. When count is odd,
+ * there is an additional obsolete sample processed, which will not be
+ * used by the calling functions.
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global channels_process_sound_chan_mono
+ .type channels_process_sound_chan_mono, %function
+channels_process_sound_chan_mono:
+ @ input: r0 = count, r1 = buf
+ stmfd sp!, {r4-r6, lr}
+ ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
+
+.monoloop:
+ ldmia r2, {r4-r5}
+ ldmia r3, {r6,lr}
+ mov r4, r4, asr #1 @ r4 = r4/2
+ add r4, r4, r6, asr #1 @ r4 = r4 + r6/2 = (buf[0]+buf[1])/2
+ mov r5, r5, asr #1 @ r5 = r5/2
+ add r5, r5, lr, asr #1 @ r5 = r5 + lr/2 = (buf[0]+buf[1])/2
+ stmia r2!, {r4-r5}
+ stmia r3!, {r4-r5}
+ subs r0, r0, #2
+ bgt .monoloop
+
+ ldmfd sp!, {r4-r6, pc}
+.monoend:
+ .size channels_process_sound_chan_mono,.monoend-channels_process_sound_chan_mono
+
+/****************************************************************************
+ * void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
+ * NOTE: The following code processes two samples at once. When count is odd,
+ * there is an additional obsolete sample processed, which will not be
+ * used by the calling functions.
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global channels_process_sound_chan_karaoke
+ .type channels_process_sound_chan_karaoke, %function
+channels_process_sound_chan_karaoke:
+ @ input: r0 = count, r1 = buf
+ stmfd sp!, {r4-r6, lr}
+ ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
+
+.karaokeloop:
+ ldmia r2, {r4-r5}
+ ldmia r3, {r6,lr}
+ mov r6, r6, asr #1 @ r6 = r6/2
+ rsb r4, r6, r4, asr #1 @ r4 = -r6 + r4/2 = (buf[0]-buf[1])/2
+ rsb r6, r4, #0 @ r6 = -r4
+ mov lr, lr, asr #1 @ lr = lr/2
+ rsb r5, lr, r5, asr #1 @ r5 = -lr + r5/2 = (buf[0]-buf[1])/2
+ rsb lr, r5, #0 @ lr = -r5
+ stmia r2!, {r4-r5}
+ stmia r3!, {r6,lr}
+ subs r0, r0, #2
+ bgt .karaokeloop
+
+ ldmfd sp!, {r4-r6, pc}
+.karaokeend:
+ .size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke
+
+/****************************************************************************
+ * void sample_output_mono(int count, struct dsp_data *data,
+ int32_t *src[], int16_t *dst)
+ * NOTE: The following code processes two samples at once. When count is odd,
+ * there is an additional obsolete sample processed, which will not be
+ * used by the calling functions.
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global sample_output_mono
+ .type sample_output_mono, %function
+sample_output_mono:
+ @ input: r0 = count, r1 = data, r2 = src, r3 = dst
+ stmfd sp!, {r4-r9, lr}
+
+ ldr r4, [r2] @ r4 = src[0]
+ ldr r5, [r1] @ lr = data->output_scale
+ sub r1, r5, #1 @ r1 = r5-1
+ mov r2, #1
+ mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
+ mvn r1, #0x8000 @ r1 needed for clipping
+ mov r8, #0xff00
+ orr r8, r8, #0xff @ r8 needed for masking
+
+.somloop:
+ ldmia r4!, {r6-r7}
+ add r6, r6, r2
+ mov r6, r6, asr r5 @ r6 = (r6 + 1<<(scale-1)) >> scale
+ mov lr, r6, asr #15
+ teq lr, lr, asr #31
+ eorne r6, r1, lr, asr #31 @ Clip (-32768...+32767)
+ add r7, r7, r2
+ mov r7, r7, asr r5 @ r7 = (r7 + 1<<(scale-1)) >> scale
+ mov lr, r7, asr #15
+ teq lr, lr, asr #31
+ eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767)
+
+ and r6, r6, r8
+ orr r6, r6, r6, asl #16 @ pack first 2 halfwords into 1 word
+ and r7, r7, r8
+ orr r7, r7, r7, asl #16 @ pack last 2 halfwords into 1 word
+ stmia r3!, {r6-r7}
+
+ subs r0, r0, #2
+ bgt .somloop
+
+ ldmfd sp!, {r4-r9, pc}
+.somend:
+ .size sample_output_mono,.somend-sample_output_mono
+
+/****************************************************************************
+ * void sample_output_stereo(int count, struct dsp_data *data,
+ int32_t *src[], int16_t *dst)
+ * NOTE: The following code processes two samples at once. When count is odd,
+ * there is an additional obsolete sample processed, which will not be
+ * used by the calling functions.
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global sample_output_stereo
+ .type sample_output_stereo, %function
+sample_output_stereo:
+ @ input: r0 = count, r1 = data, r2 = src, r3 = dst
+ stmfd sp!, {r4-r11, lr}
+
+ ldmia r2, {r4-r5} @ r4 = src[0], r5 = src[1]
+ ldr r6, [r1] @ r6 = data->output_scale
+ sub r1, r6, #1 @ r1 = r6-1
+ mov r2, #1
+ mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
+ mvn r1, #0x8000 @ r1 needed for clipping
+ mov r11, #0xff00
+ orr r11, r11, #0xff @ r11 needed for masking
+
+.sosloop:
+ ldmia r4!, {r7-r8}
+ add r7, r7, r2
+ mov r7, r7, asr r6 @ r7 = (r7 + 1<<(scale-1)) >> scale
+ mov lr, r7, asr #15
+ teq lr, lr, asr #31
+ eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767)
+ add r8, r8, r2
+ mov r8, r8, asr r6 @ r8 = (r8 + 1<<(scale-1)) >> scale
+ mov lr, r8, asr #15
+ teq lr, lr, asr #31
+ eorne r8, r1, lr, asr #31 @ Clip (-32768...+32767)
+
+ ldmia r5!, {r9-r10}
+ add r9, r9, r2
+ mov r9, r9, asr r6 @ r9 = (r9 + 1<<(scale-1)) >> scale
+ mov lr, r9, asr #15
+ teq lr, lr, asr #31
+ eorne r9, r1, lr, asr #31 @ Clip (-32768...+32767)
+ add r10, r10, r2
+ mov r10, r10, asr r6 @ r10 = (r10 + 1<<(scale-1)) >> scale
+ mov lr, r10, asr #15
+ teq lr, lr, asr #31
+ eorne r10, r1, lr, asr #31 @ Clip (-32768...+32767)
+
+ and r7, r7, r11
+ orr r9, r7, r9, asl #16 @ pack first 2 halfwords into 1 word
+ and r8, r8, r11
+ orr r10, r8, r10, asl #16 @ pack last 2 halfwords into 1 word
+ stmia r3!, {r9-r10}
+
+ subs r0, r0, #2
+ bgt .sosloop
+
+ ldmfd sp!, {r4-r11, pc}
+.sosend:
+ .size sample_output_stereo,.sosend-sample_output_stereo
+
+/****************************************************************************
* void apply_crossfeed(int count, int32_t* src[])
*/
.section .text
diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h
index 02307db..9c40dee 100644
--- a/apps/dsp_asm.h
+++ b/apps/dsp_asm.h
@@ -26,6 +26,10 @@
#if defined(CPU_ARM)
#define DSP_HAVE_ASM_RESAMPLING
#define DSP_HAVE_ASM_CROSSFEED
+#define DSP_HAVE_ASM_SOUND_CHAN_MONO
+#define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
+#define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
+#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
#elif defined (CPU_COLDFIRE)
#define DSP_HAVE_ASM_APPLY_GAIN
#define DSP_HAVE_ASM_RESAMPLING