diff options
| author | Dave Chapman <dave@dchapman.com> | 2005-02-19 22:11:29 +0000 |
|---|---|---|
| committer | Dave Chapman <dave@dchapman.com> | 2005-02-19 22:11:29 +0000 |
| commit | e9edc8f82df2c182c2453720a79ad37c55e6ef4b (patch) | |
| tree | 4f3eea003ae0124feb2afb51d3a32bce56108f6f | |
| parent | a3ed6e9c7a90cd5dcffed397c894e480672d667c (diff) | |
| download | rockbox-e9edc8f82df2c182c2453720a79ad37c55e6ef4b.zip rockbox-e9edc8f82df2c182c2453720a79ad37c55e6ef4b.tar.gz rockbox-e9edc8f82df2c182c2453720a79ad37c55e6ef4b.tar.bz2 rockbox-e9edc8f82df2c182c2453720a79ad37c55e6ef4b.tar.xz | |
Thom Johansen's first EMAC optimisation for the Coldfire - about a 3%-4% speedup
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6024 a1c6a512-1295-4272-9138-f99709370657
| -rw-r--r-- | apps/codecs/libFLAC/SOURCES | 3 | ||||
| -rw-r--r-- | apps/codecs/libFLAC/coldfire.c | 165 | ||||
| -rw-r--r-- | apps/codecs/libFLAC/include/private/coldfire.h | 46 | ||||
| -rw-r--r-- | apps/codecs/libFLAC/stream_decoder.c | 8 |
4 files changed, 222 insertions, 0 deletions
diff --git a/apps/codecs/libFLAC/SOURCES b/apps/codecs/libFLAC/SOURCES index fc793f5..7f5abc2 100644 --- a/apps/codecs/libFLAC/SOURCES +++ b/apps/codecs/libFLAC/SOURCES @@ -10,3 +10,6 @@ md5.c memory.c seekable_stream_decoder.c stream_decoder.c +#if CONFIG_CPU==MCF5249 +coldfire.c +#endif diff --git a/apps/codecs/libFLAC/coldfire.c b/apps/codecs/libFLAC/coldfire.c new file mode 100644 index 0000000..c763c20 --- /dev/null +++ b/apps/codecs/libFLAC/coldfire.c @@ -0,0 +1,165 @@ +#ifndef SIMULATOR +#include <private/coldfire.h> + +void FLAC__lpc_restore_signal_order8_mac(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]) +{ + register const FLAC__int32 *qlp0 = &qlp_coeff[(order-1)]; + register FLAC__int32 sum; + register const FLAC__int32 *history; + + SET_MACSR(0); + history = &data[(-order)]; + SET_ACC(0, acc0); + + switch (order) { + case 8: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t" + "mov.l -12(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t" + "mov.l -16(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 20(%2), %%d1, %%acc0\n\t" + "mov.l -20(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 24(%2), %%d1, %%acc0\n\t" + "mov.l -24(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 28(%2), %%d1, %%acc0\n\t" + "mov.l -28(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 7: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t" + "mov.l -12(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t" + "mov.l -16(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 20(%2), %%d1, %%acc0\n\t" + "mov.l -20(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 24(%2), %%d1, %%acc0\n\t" + "mov.l -24(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 6: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t" + "mov.l -12(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t" + "mov.l -16(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 20(%2), %%d1, %%acc0\n\t" + "mov.l -20(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 5: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t" + "mov.l -12(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t" + "mov.l -16(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 4: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t" + "mov.l -12(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 3: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 2: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 1: + // won't gain anything by using mac here. + for( ; data_len != 0; --data_len) { + sum = (qlp0[0] * (*(history++))); + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + } +} + +#endif diff --git a/apps/codecs/libFLAC/include/private/coldfire.h b/apps/codecs/libFLAC/include/private/coldfire.h new file mode 100644 index 0000000..22f1711 --- /dev/null +++ b/apps/codecs/libFLAC/include/private/coldfire.h @@ -0,0 +1,46 @@ +#ifndef SIMULATOR +#ifndef _FLAC_COLDFIRE_H +#define _FLAC_COLDFIRE_H + +#include <FLAC/ordinals.h> + +#define MACL(x, y, acc) \ + asm volatile ("mac.l %0, %1, %%" #acc \ + : : "ad" ((x)), "ad" ((y))); + +#define MACL_SHIFT(x, y, shift, acc) \ + asm volatile ("mac.l %0, %1, #" #shift ", %%" #acc \ + : : "ad" ((x)), "ad" ((y))); + +#define MSACL(x, y, acc) \ + asm volatile ("msac.l %0, %1, %%" #acc \ + : : "ad" ((x)), "ad" ((y))); + +#define MSACL_SHIFT(x, y, shift, acc) \ + asm volatile ("msac.l %0, %1, #" #shift ", %%" #acc \ + : : "ad" ((x)), "ad" ((y))); + +#define SET_MACSR(x) \ + asm volatile ("mov.l %0, %%macsr" : : "adi" ((x))); + +#define TRANSFER_ACC(acca, accb) \ + asm volatile ("mov.l %" #acca ", %" #accb); + +#define SET_ACC(x, acc) \ + asm volatile ("mov.l %0, %%" #acc : : "adi" ((x))); + +#define GET_ACC(x, acc) \ + asm volatile ("mov.l %%" #acc ", %0\n\t" : "=ad" ((x))); + +#define GET_ACC_CLR(x, acc) \ + asm volatile ("movclr.l %%" #acc ", %0\n\t" : "=ad" ((x))); + +#define EMAC_SATURATE 0x00000080 +#define EMAC_FRACTIONAL 0x00000020 +#define EMAC_ROUND 0x00000010 + + +void FLAC__lpc_restore_signal_order8_mac(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); + +#endif +#endif diff --git a/apps/codecs/libFLAC/stream_decoder.c b/apps/codecs/libFLAC/stream_decoder.c index ec43314..eb78d18 100644 --- a/apps/codecs/libFLAC/stream_decoder.c +++ b/apps/codecs/libFLAC/stream_decoder.c @@ -43,6 +43,10 @@ #include "private/lpc.h" #include "private/memory.h" +#if CONFIG_CPU==MCF5249 +#include <private/coldfire.h> +#endif + #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -298,7 +302,11 @@ FLAC_API FLAC__StreamDecoderState FLAC__stream_decoder_init(FLAC__StreamDecoder decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal; decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide; decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal; +#if CONFIG_CPU==MCF5249 && !SIMULATOR + decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_order8_mac; +#else decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal; +#endif /* now override with asm where appropriate */ #ifndef FLAC__NO_ASM if(decoder->private_->cpuinfo.use_asm) { |