diff options
| author | Nils Wallménius <nils@rockbox.org> | 2010-11-05 11:49:30 +0000 |
|---|---|---|
| committer | Nils Wallménius <nils@rockbox.org> | 2010-11-05 11:49:30 +0000 |
| commit | 9d98583a181bdab20263262d121130111cf3be86 (patch) | |
| tree | 3c9f306a4f045a55e3bc31df3626f978f7c7c69e /apps/codecs | |
| parent | dbdc0a8a8cbfa4e6b72e5f6fb643f5b0ef4afc27 (diff) | |
| download | rockbox-9d98583a181bdab20263262d121130111cf3be86.zip rockbox-9d98583a181bdab20263262d121130111cf3be86.tar.gz rockbox-9d98583a181bdab20263262d121130111cf3be86.tar.bz2 rockbox-9d98583a181bdab20263262d121130111cf3be86.tar.xz | |
libmusepack: tiny optimization of the ARMv4 mpc_decoder_windowing_D implementations, using register indexed addressing to store data, saving one instruction in the loop and deleting one instruction adter the loop which isn't needed.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28488 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
| -rw-r--r-- | apps/codecs/libmusepack/synth_filter_arm.S | 22 |
1 files changed, 9 insertions, 13 deletions
diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S index 598f218..521d690 100644 --- a/apps/codecs/libmusepack/synth_filter_arm.S +++ b/apps/codecs/libmusepack/synth_filter_arm.S @@ -164,7 +164,7 @@ mpc_decoder_windowing_D: * r10 = lo, r11 = hi of 31..17 * r12 = V[31..16] *****************************************/ - mov lr, #15 + mov lr, #15*8 add r12, r1, #30*4 /* r12 = V[31] */ .loop15: ldmia r2!, { r3-r6 } /* load D[00..03] */ @@ -240,19 +240,18 @@ mpc_decoder_windowing_D: orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ str r8, [r0] /* store Data */ /* store Data[31..17] */ - add r0, r0, lr, asl #3 /* r0 = r0 + 2*lr [words] */ mov r10, r10, lsr #16 orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */ rsb r10, r10, #0 /* r10 = -r10 */ - str r10, [r0], #4 /* store Data */ - sub r0, r0, lr, asl #3 /* r0 = r0 - 2*lr [words] */ + str r10, [r0, lr] /* store Data */ + add r0, r0, #4 /* r0++ */ /* correct adresses for next loop */ sub r12, r12, #4 /* r12 = V-- */ add r1, r1, #4 /* r1 = V++ */ /* next loop */ - subs lr, lr, #1 + subs lr, lr, #8 bgt .loop15 - + /****************************************** * V[16] with internal symmetry *****************************************/ @@ -293,7 +292,6 @@ mpc_decoder_windowing_D: mov r8, r8, lsr #16 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ str r8, [r0], #4 /* store Data */ - add r1, r1, #4 /* V++ */ ldmpc regs=r4-r11 #elif ARM_ARCH < 6 /* arm9 and above */ @@ -365,7 +363,7 @@ mpc_decoder_windowing_D: * r10 = lo, r11 = hi of 31..17 * r12 = V[31..16] *****************************************/ - mov lr, #15 + mov lr, #15*8 add r12, r1, #30*4 /* r12 = V[31] */ .loop15: ldmia r2!, { r3-r4 } /* load D[00..01] */ @@ -445,17 +443,16 @@ mpc_decoder_windowing_D: orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ str r8, [r0] /* store Data */ /* store Data[31..17] */ - add r0, r0, lr, asl #3 /* r0 = r0 + 2*lr [words] */ mov r10, r10, lsr #16 orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */ rsb r10, r10, #0 /* r10 = -r10 */ - str r10, [r0], #4 /* store Data */ - sub r0, r0, lr, asl #3 /* r0 = r0 - 2*lr [words] */ + str r10, [r0, lr] /* store Data */ + add r0, r0, #4 /* r0++ */ /* correct adresses for next loop */ sub r12, r12, #4 /* r12 = V-- */ add r1, r1, #4 /* r1 = V++ */ /* next loop */ - subs lr, lr, #1 + subs lr, lr, #8 bgt .loop15 /****************************************** @@ -498,7 +495,6 @@ mpc_decoder_windowing_D: mov r8, r8, lsr #16 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ str r8, [r0], #4 /* store Data */ - add r1, r1, #4 /* V++ */ ldmpc regs=r4-r11 #else |