summaryrefslogtreecommitdiff
path: root/apps/codecs/libFLAC/coldfire.c
diff options
context:
space:
mode:
authorDave Chapman <dave@dchapman.com>2005-02-19 22:11:29 +0000
committerDave Chapman <dave@dchapman.com>2005-02-19 22:11:29 +0000
commite9edc8f82df2c182c2453720a79ad37c55e6ef4b (patch)
tree4f3eea003ae0124feb2afb51d3a32bce56108f6f /apps/codecs/libFLAC/coldfire.c
parenta3ed6e9c7a90cd5dcffed397c894e480672d667c (diff)
downloadrockbox-e9edc8f82df2c182c2453720a79ad37c55e6ef4b.zip
rockbox-e9edc8f82df2c182c2453720a79ad37c55e6ef4b.tar.gz
rockbox-e9edc8f82df2c182c2453720a79ad37c55e6ef4b.tar.bz2
rockbox-e9edc8f82df2c182c2453720a79ad37c55e6ef4b.tar.xz
Thom Johansen's first EMAC optimisation for the Coldfire - about a 3%-4% speedup
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6024 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libFLAC/coldfire.c')
-rw-r--r--apps/codecs/libFLAC/coldfire.c165
1 files changed, 165 insertions, 0 deletions
diff --git a/apps/codecs/libFLAC/coldfire.c b/apps/codecs/libFLAC/coldfire.c
new file mode 100644
index 0000000..c763c20
--- /dev/null
+++ b/apps/codecs/libFLAC/coldfire.c
@@ -0,0 +1,165 @@
+#ifndef SIMULATOR
+#include <private/coldfire.h>
+
+void FLAC__lpc_restore_signal_order8_mac(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
+{
+ register const FLAC__int32 *qlp0 = &qlp_coeff[(order-1)];
+ register FLAC__int32 sum;
+ register const FLAC__int32 *history;
+
+ SET_MACSR(0);
+ history = &data[(-order)];
+ SET_ACC(0, acc0);
+
+ switch (order) {
+ case 8:
+ for( ; data_len != 0; --data_len) {
+ asm volatile(
+ "mov.l (%1), %%d0\n\t"
+ "mov.l (%2), %%d1\n\t"
+ "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t"
+ "mov.l -4(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t"
+ "mov.l -8(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t"
+ "mov.l -12(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t"
+ "mov.l -16(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 20(%2), %%d1, %%acc0\n\t"
+ "mov.l -20(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 24(%2), %%d1, %%acc0\n\t"
+ "mov.l -24(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 28(%2), %%d1, %%acc0\n\t"
+ "mov.l -28(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, %%acc0\n\t"
+ "movclr.l %%acc0, %0"
+ : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1");
+ ++history;
+ *(data++) = *(residual++) + (sum >> lp_quantization);
+ }
+ return;
+ case 7:
+ for( ; data_len != 0; --data_len) {
+ asm volatile(
+ "mov.l (%1), %%d0\n\t"
+ "mov.l (%2), %%d1\n\t"
+ "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t"
+ "mov.l -4(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t"
+ "mov.l -8(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t"
+ "mov.l -12(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t"
+ "mov.l -16(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 20(%2), %%d1, %%acc0\n\t"
+ "mov.l -20(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 24(%2), %%d1, %%acc0\n\t"
+ "mov.l -24(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, %%acc0\n\t"
+ "movclr.l %%acc0, %0"
+ : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1");
+ ++history;
+ *(data++) = *(residual++) + (sum >> lp_quantization);
+ }
+ return;
+ case 6:
+ for( ; data_len != 0; --data_len) {
+ asm volatile(
+ "mov.l (%1), %%d0\n\t"
+ "mov.l (%2), %%d1\n\t"
+ "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t"
+ "mov.l -4(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t"
+ "mov.l -8(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t"
+ "mov.l -12(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t"
+ "mov.l -16(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 20(%2), %%d1, %%acc0\n\t"
+ "mov.l -20(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, %%acc0\n\t"
+ "movclr.l %%acc0, %0"
+ : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1");
+ ++history;
+ *(data++) = *(residual++) + (sum >> lp_quantization);
+ }
+ return;
+ case 5:
+ for( ; data_len != 0; --data_len) {
+ asm volatile(
+ "mov.l (%1), %%d0\n\t"
+ "mov.l (%2), %%d1\n\t"
+ "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t"
+ "mov.l -4(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t"
+ "mov.l -8(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t"
+ "mov.l -12(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t"
+ "mov.l -16(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, %%acc0\n\t"
+ "movclr.l %%acc0, %0"
+ : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1");
+ ++history;
+ *(data++) = *(residual++) + (sum >> lp_quantization);
+ }
+ return;
+ case 4:
+ for( ; data_len != 0; --data_len) {
+ asm volatile(
+ "mov.l (%1), %%d0\n\t"
+ "mov.l (%2), %%d1\n\t"
+ "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t"
+ "mov.l -4(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t"
+ "mov.l -8(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t"
+ "mov.l -12(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, %%acc0\n\t"
+ "movclr.l %%acc0, %0"
+ : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1");
+ ++history;
+ *(data++) = *(residual++) + (sum >> lp_quantization);
+ }
+ return;
+ case 3:
+ for( ; data_len != 0; --data_len) {
+ asm volatile(
+ "mov.l (%1), %%d0\n\t"
+ "mov.l (%2), %%d1\n\t"
+ "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t"
+ "mov.l -4(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t"
+ "mov.l -8(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, %%acc0\n\t"
+ "movclr.l %%acc0, %0"
+ : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1");
+ ++history;
+ *(data++) = *(residual++) + (sum >> lp_quantization);
+ }
+ return;
+ case 2:
+ for( ; data_len != 0; --data_len) {
+ asm volatile(
+ "mov.l (%1), %%d0\n\t"
+ "mov.l (%2), %%d1\n\t"
+ "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t"
+ "mov.l -4(%1), %%d0\n\t"
+ "mac.l %%d0, %%d1, %%acc0\n\t"
+ "movclr.l %%acc0, %0"
+ : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1");
+ ++history;
+ *(data++) = *(residual++) + (sum >> lp_quantization);
+ }
+ return;
+ case 1:
+ // won't gain anything by using mac here.
+ for( ; data_len != 0; --data_len) {
+ sum = (qlp0[0] * (*(history++)));
+ *(data++) = *(residual++) + (sum >> lp_quantization);
+ }
+ return;
+ }
+}
+
+#endif