diff options
| author | Dave Chapman <dave@dchapman.com> | 2005-02-19 22:11:29 +0000 |
|---|---|---|
| committer | Dave Chapman <dave@dchapman.com> | 2005-02-19 22:11:29 +0000 |
| commit | e9edc8f82df2c182c2453720a79ad37c55e6ef4b (patch) | |
| tree | 4f3eea003ae0124feb2afb51d3a32bce56108f6f /apps/codecs/libFLAC/coldfire.c | |
| parent | a3ed6e9c7a90cd5dcffed397c894e480672d667c (diff) | |
| download | rockbox-e9edc8f82df2c182c2453720a79ad37c55e6ef4b.zip rockbox-e9edc8f82df2c182c2453720a79ad37c55e6ef4b.tar.gz rockbox-e9edc8f82df2c182c2453720a79ad37c55e6ef4b.tar.bz2 rockbox-e9edc8f82df2c182c2453720a79ad37c55e6ef4b.tar.xz | |
Thom Johansen's first EMAC optimisation for the Coldfire - about a 3%-4% speedup
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6024 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libFLAC/coldfire.c')
| -rw-r--r-- | apps/codecs/libFLAC/coldfire.c | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/apps/codecs/libFLAC/coldfire.c b/apps/codecs/libFLAC/coldfire.c new file mode 100644 index 0000000..c763c20 --- /dev/null +++ b/apps/codecs/libFLAC/coldfire.c @@ -0,0 +1,165 @@ +#ifndef SIMULATOR +#include <private/coldfire.h> + +void FLAC__lpc_restore_signal_order8_mac(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]) +{ + register const FLAC__int32 *qlp0 = &qlp_coeff[(order-1)]; + register FLAC__int32 sum; + register const FLAC__int32 *history; + + SET_MACSR(0); + history = &data[(-order)]; + SET_ACC(0, acc0); + + switch (order) { + case 8: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t" + "mov.l -12(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t" + "mov.l -16(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 20(%2), %%d1, %%acc0\n\t" + "mov.l -20(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 24(%2), %%d1, %%acc0\n\t" + "mov.l -24(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 28(%2), %%d1, %%acc0\n\t" + "mov.l -28(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 7: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t" + "mov.l -12(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t" + "mov.l -16(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 20(%2), %%d1, %%acc0\n\t" + "mov.l -20(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 24(%2), %%d1, %%acc0\n\t" + "mov.l -24(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 6: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t" + "mov.l -12(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t" + "mov.l -16(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 20(%2), %%d1, %%acc0\n\t" + "mov.l -20(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 5: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t" + "mov.l -12(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t" + "mov.l -16(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 4: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t" + "mov.l -12(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 3: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 2: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 1: + // won't gain anything by using mac here. + for( ; data_len != 0; --data_len) { + sum = (qlp0[0] * (*(history++))); + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + } +} + +#endif |