summaryrefslogtreecommitdiff
path: root/apps/codecs
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs')
-rw-r--r--apps/codecs/demac/libdemac/decoder.c13
-rw-r--r--apps/codecs/demac/libdemac/demac_config.h27
-rw-r--r--apps/codecs/demac/libdemac/filter.c41
-rw-r--r--apps/codecs/demac/libdemac/filter.h12
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_arm7.h293
-rw-r--r--apps/codecs/demac/libdemac/vector_math32_armv4.h210
-rw-r--r--apps/codecs/demac/libdemac/vector_math_generic.h (renamed from apps/codecs/demac/libdemac/vector_math16.h)10
7 files changed, 280 insertions, 326 deletions
diff --git a/apps/codecs/demac/libdemac/decoder.c b/apps/codecs/demac/libdemac/decoder.c
index 540db47..31bcb28 100644
--- a/apps/codecs/demac/libdemac/decoder.c
+++ b/apps/codecs/demac/libdemac/decoder.c
@@ -33,15 +33,16 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
/* Statically allocate the filter buffers */
-static int16_t filterbuf32[(32*3 + FILTER_HISTORY_SIZE) * 2] /* 2432 bytes */
- IBSS_ATTR __attribute__((aligned(16)));
-static int16_t filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2] /* 5120 bytes */
- IBSS_ATTR __attribute__((aligned(16)));
+static filter_int filterbuf32[(32*3 + FILTER_HISTORY_SIZE) * 2]
+ IBSS_ATTR __attribute__((aligned(16))); /* 2432/4864 bytes */
+static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2]
+ IBSS_ATTR __attribute__((aligned(16))); /* 5120/10240 bytes */
/* This is only needed for "insane" files, and no current Rockbox targets
can hope to decode them in realtime, although the Gigabeat S comes close. */
-static int16_t filterbuf1280[(1280*3 + FILTER_HISTORY_SIZE) * 2] /* 17408 bytes */
- IBSS_ATTR_DEMAC_INSANEBUF __attribute__((aligned(16)));
+static filter_int filterbuf1280[(1280*3 + FILTER_HISTORY_SIZE) * 2]
+ IBSS_ATTR_DEMAC_INSANEBUF __attribute__((aligned(16)));
+ /* 17408 or 34816 bytes */
void init_frame_decoder(struct ape_ctx_t* ape_ctx,
unsigned char* inbuffer, int* firstbyte,
diff --git a/apps/codecs/demac/libdemac/demac_config.h b/apps/codecs/demac/libdemac/demac_config.h
index 93fda76..86c2d24 100644
--- a/apps/codecs/demac/libdemac/demac_config.h
+++ b/apps/codecs/demac/libdemac/demac_config.h
@@ -39,12 +39,21 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
#define APE_OUTPUT_DEPTH 29
-/* On PP5002 code should go into IRAM. Otherwise put the insane
- * filter buffer into IRAM as long as there is no better use. */
+/* On ARMv4, using 32 bit ints for the filters is faster. */
+#if defined(CPU_ARM) && (ARM_ARCH == 4)
+#define FILTER_BITS 32
+#endif
+
#if CONFIG_CPU == PP5002
+/* Code in IRAM for speed, not enough IRAM for the insane filter buffer. */
#define ICODE_SECTION_DEMAC_ARM .icode
#define ICODE_ATTR_DEMAC ICODE_ATTR
#define IBSS_ATTR_DEMAC_INSANEBUF
+#elif CONFIG_CPU == PP5020
+/* Not enough IRAM for the insane filter buffer. */
+#define ICODE_SECTION_DEMAC_ARM .text
+#define ICODE_ATTR_DEMAC
+#define IBSS_ATTR_DEMAC_INSANEBUF
#else
#define ICODE_SECTION_DEMAC_ARM .text
#define ICODE_ATTR_DEMAC
@@ -75,6 +84,20 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
#ifndef PREDICTOR_HISTORY_SIZE
#define PREDICTOR_HISTORY_SIZE 512
+#endif
+
+#ifndef FILTER_BITS
+#define FILTER_BITS 16
+#endif
+
+
+#ifndef __ASSEMBLER__
+#include <inttypes.h>
+#if FILTER_BITS == 32
+typedef int32_t filter_int;
+#elif FILTER_BITS == 16
+typedef int16_t filter_int;
+#endif
#endif
#endif /* _DEMAC_CONFIG_H */
diff --git a/apps/codecs/demac/libdemac/filter.c b/apps/codecs/demac/libdemac/filter.c
index b47a37a..5601fff 100644
--- a/apps/codecs/demac/libdemac/filter.c
+++ b/apps/codecs/demac/libdemac/filter.c
@@ -28,27 +28,38 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
#include "demac.h"
#include "filter.h"
#include "demac_config.h"
+
+#if FILTER_BITS == 32
+
+#if defined(CPU_ARM) && (ARM_ARCH == 4)
+#include "vector_math32_armv4.h"
+#else
+#include "vector_math_generic.h"
+#endif
+
+#else /* FILTER_BITS == 16 */
#ifdef CPU_COLDFIRE
#include "vector_math16_cf.h"
-#elif ARM_ARCH >= 6
+#elif defined(CPU_ARM) && (ARM_ARCH >= 6)
#include "vector_math16_armv6.h"
-#elif ARM_ARCH >= 5 /* Assume all our ARMv5 targets are ARMv5te(j) */
+#elif defined(CPU_ARM) && (ARM_ARCH >= 5)
+/* Assume all our ARMv5 targets are ARMv5te(j) */
#include "vector_math16_armv5te.h"
-#elif defined CPU_ARM7TDMI
-#include "vector_math16_arm7.h"
#else
-#include "vector_math16.h"
+#include "vector_math_generic.h"
#endif
+#endif /* FILTER_BITS */
+
struct filter_t {
- int16_t* coeffs; /* ORDER entries */
+ filter_int* coeffs; /* ORDER entries */
/* We store all the filter delays in a single buffer */
- int16_t* history_end;
+ filter_int* history_end;
- int16_t* delay;
- int16_t* adaptcoeffs;
+ filter_int* delay;
+ filter_int* adaptcoeffs;
int avg;
};
@@ -89,7 +100,7 @@ struct filter_t {
#if defined(CPU_ARM) && (ARM_ARCH >= 6)
#define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; })
#else
-#define SATURATE(x) (int16_t)(((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF);
+#define SATURATE(x) (((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF);
#endif
/* Apply the filter with state f to count entries in data[] */
@@ -145,7 +156,7 @@ static void ICODE_ATTR_DEMAC do_apply_filter_3980(struct filter_t* f,
/* Have we filled the history buffer? */
if (f->delay == f->history_end) {
memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
- (ORDER*2) * sizeof(int16_t));
+ (ORDER*2) * sizeof(filter_int));
f->adaptcoeffs = f->coeffs + ORDER*2;
f->delay = f->coeffs + ORDER*3;
}
@@ -190,7 +201,7 @@ static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f,
/* Have we filled the history buffer? */
if (f->delay == f->history_end) {
memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
- (ORDER*2) * sizeof(int16_t));
+ (ORDER*2) * sizeof(filter_int));
f->adaptcoeffs = f->coeffs + ORDER*2;
f->delay = f->coeffs + ORDER*3;
}
@@ -200,7 +211,7 @@ static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f,
static struct filter_t filter0 IBSS_ATTR;
static struct filter_t filter1 IBSS_ATTR;
-static void do_init_filter(struct filter_t* f, int16_t* buf)
+static void do_init_filter(struct filter_t* f, filter_int* buf)
{
f->coeffs = buf;
f->history_end = buf + ORDER*3 + FILTER_HISTORY_SIZE;
@@ -210,13 +221,13 @@ static void do_init_filter(struct filter_t* f, int16_t* buf)
f->delay = f->coeffs + ORDER*3;
/* Zero coefficients and history buffer */
- memset(f->coeffs, 0, ORDER*3 * sizeof(int16_t));
+ memset(f->coeffs, 0, ORDER*3 * sizeof(filter_int));
/* Zero the running average */
f->avg = 0;
}
-void INIT_FILTER(int16_t* buf)
+void INIT_FILTER(filter_int* buf)
{
do_init_filter(&filter0, buf);
do_init_filter(&filter1, buf + ORDER*3 + FILTER_HISTORY_SIZE);
diff --git a/apps/codecs/demac/libdemac/filter.h b/apps/codecs/demac/libdemac/filter.h
index acbb155..bbe51d4 100644
--- a/apps/codecs/demac/libdemac/filter.h
+++ b/apps/codecs/demac/libdemac/filter.h
@@ -25,21 +25,21 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
#ifndef _APE_FILTER_H
#define _APE_FILTER_H
-#include <inttypes.h>
+#include "demac_config.h"
-void init_filter_16_11(int16_t* buf);
+void init_filter_16_11(filter_int* buf);
int apply_filter_16_11(int fileversion, int32_t* decoded0, int32_t* decoded1, int count);
-void init_filter_64_11(int16_t* buf);
+void init_filter_64_11(filter_int* buf);
int apply_filter_64_11(int fileversion, int32_t* decoded0, int32_t* decoded1, int count);
-void init_filter_32_10(int16_t* buf);
+void init_filter_32_10(filter_int* buf);
int apply_filter_32_10(int fileversion, int32_t* decoded0, int32_t* decoded1, int count);
-void init_filter_256_13(int16_t* buf);
+void init_filter_256_13(filter_int* buf);
int apply_filter_256_13(int fileversion, int32_t* decoded0, int32_t* decoded1, int count);
-void init_filter_1280_15(int16_t* buf);
+void init_filter_1280_15(filter_int* buf);
int apply_filter_1280_15(int fileversion, int32_t* decoded0, int32_t* decoded1, int count);
#endif
diff --git a/apps/codecs/demac/libdemac/vector_math16_arm7.h b/apps/codecs/demac/libdemac/vector_math16_arm7.h
deleted file mode 100644
index 653bb1f..0000000
--- a/apps/codecs/demac/libdemac/vector_math16_arm7.h
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
-
-libdemac - A Monkey's Audio decoder
-
-$Id$
-
-Copyright (C) Dave Chapman 2007
-
-ARM7 vector math copyright (C) 2007 Jens Arnold
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
-
-*/
-
-/* This version fetches data as 32 bit words, and *requires* v1 to be
- * 32 bit aligned, otherwise it will result either in a data abort, or
- * incorrect results (if ARM aligncheck is disabled). */
-static inline void vector_add(int16_t* v1, int16_t* v2)
-{
-#if ORDER > 16
- int cnt = ORDER>>4;
-#endif
-
-#define ADDHALFREGS(sum, s1) /* Adds register */ \
- "mov " #s1 ", " #s1 ", ror #16 \n" /* halves straight. */ \
- "add r8 , " #s1 ", " #sum ", lsl #16 \n" /* Clobbers 's1' */ \
- "add " #sum ", " #s1 ", " #sum ", lsr #16 \n" /* and r8. */ \
- "mov " #sum ", " #sum ", lsl #16 \n" \
- "orr " #sum ", " #sum ", r8 , lsr #16 \n"
-
-#define ADDHALFXREGS(sum, s1, s2) /* Adds register */ \
- "add " #s1 ", " #s1 ", " #sum ", lsl #16 \n" /* halves across. */ \
- "add " #sum ", " #s2 ", " #sum ", lsr #16 \n" /* Clobbers 's1'. */ \
- "mov " #sum ", " #sum ", lsl #16 \n" \
- "orr " #sum ", " #sum ", " #s1 ", lsr #16 \n"
-
- asm volatile (
- "tst %[v2], #2 \n"
- "beq 20f \n"
-
- "10: \n"
- "ldrh r4, [%[v2]], #2 \n"
- "mov r4, r4, lsl #16 \n"
- "1: \n"
- "ldmia %[v1], {r0-r3} \n"
- "ldmia %[v2]!, {r5-r8} \n"
- ADDHALFXREGS(r0, r4, r5)
- ADDHALFXREGS(r1, r5, r6)
- ADDHALFXREGS(r2, r6, r7)
- ADDHALFXREGS(r3, r7, r8)
- "stmia %[v1]!, {r0-r3} \n"
- "mov r4, r8 \n"
- "ldmia %[v1], {r0-r3} \n"
- "ldmia %[v2]!, {r5-r8} \n"
- ADDHALFXREGS(r0, r4, r5)
- ADDHALFXREGS(r1, r5, r6)
- ADDHALFXREGS(r2, r6, r7)
- ADDHALFXREGS(r3, r7, r8)
- "stmia %[v1]!, {r0-r3} \n"
-#if ORDER > 16
- "mov r4, r8 \n"
- "subs %[cnt], %[cnt], #1 \n"
- "bne 1b \n"
-#endif
- "b 99f \n"
-
- "20: \n"
- "1: \n"
- "ldmia %[v1], {r0-r3} \n"
- "ldmia %[v2]!, {r4-r7} \n"
- ADDHALFREGS(r0, r4)
- ADDHALFREGS(r1, r5)
- ADDHALFREGS(r2, r6)
- ADDHALFREGS(r3, r7)
- "stmia %[v1]!, {r0-r3} \n"
- "ldmia %[v1], {r0-r3} \n"
- "ldmia %[v2]!, {r4-r7} \n"
- ADDHALFREGS(r0, r4)
- ADDHALFREGS(r1, r5)
- ADDHALFREGS(r2, r6)
- ADDHALFREGS(r3, r7)
- "stmia %[v1]!, {r0-r3} \n"
-#if ORDER > 16
- "subs %[cnt], %[cnt], #1 \n"
- "bne 1b \n"
-#endif
-
- "99: \n"
- : /* outputs */
-#if ORDER > 16
- [cnt]"+r"(cnt),
-#endif
- [v1] "+r"(v1),
- [v2] "+r"(v2)
- : /* inputs */
- : /* clobbers */
- "r0", "r1", "r2", "r3", "r4",
- "r5", "r6", "r7", "r8", "memory"
- );
-}
-
-/* This version fetches data as 32 bit words, and *requires* v1 to be
- * 32 bit aligned, otherwise it will result either in a data abort, or
- * incorrect results (if ARM aligncheck is disabled). */
-static inline void vector_sub(int16_t* v1, int16_t* v2)
-{
-#if ORDER > 16
- int cnt = ORDER>>4;
-#endif
-
-#define SUBHALFREGS(dif, s1) /* Subtracts register */ \
- "sub r8 , " #dif ", " #s1 "\n" /* halves straight. */ \
- "and r8 , r8 , r9 \n" /* Needs r9 = 0x0000ffff, */ \
- "mov " #dif ", " #dif ", lsr #16 \n" /* clobbers r8. */ \
- "sub " #dif ", " #dif ", " #s1 ", lsr #16 \n" \
- "orr " #dif ", r8 , " #dif ", lsl #16 \n"
-
-#define SUBHALFXREGS(dif, s1, s2) /* Subtracts register */ \
- "sub " #s1 ", " #dif ", " #s1 ", lsr #16 \n" /* halves across. */ \
- "and " #s1 ", " #s1 ", r9 \n" /* Needs r9 = 0x0000ffff, */ \
- "rsb " #dif ", " #s2 ", " #dif ", lsr #16 \n" /* clobbers 's1'. */ \
- "orr " #dif ", " #s1 ", " #dif ", lsl #16 \n"
-
- asm volatile (
- "mov r9, #0xff \n"
- "orr r9, r9, #0xff00 \n"
- "tst %[v2], #2 \n"
- "beq 20f \n"
-
- "10: \n"
- "ldrh r4, [%[v2]], #2 \n"
- "mov r4, r4, lsl #16 \n"
- "1: \n"
- "ldmia %[v1], {r0-r3} \n"
- "ldmia %[v2]!, {r5-r8} \n"
- SUBHALFXREGS(r0, r4, r5)
- SUBHALFXREGS(r1, r5, r6)
- SUBHALFXREGS(r2, r6, r7)
- SUBHALFXREGS(r3, r7, r8)
- "stmia %[v1]!, {r0-r3} \n"
- "mov r4, r8 \n"
- "ldmia %[v1], {r0-r3} \n"
- "ldmia %[v2]!, {r5-r8} \n"
- SUBHALFXREGS(r0, r4, r5)
- SUBHALFXREGS(r1, r5, r6)
- SUBHALFXREGS(r2, r6, r7)
- SUBHALFXREGS(r3, r7, r8)
- "stmia %[v1]!, {r0-r3} \n"
-#if ORDER > 16
- "mov r4, r8 \n"
- "subs %[cnt], %[cnt], #1 \n"
- "bne 1b \n"
-#endif
- "b 99f \n"
-
- "20: \n"
- "1: \n"
- "ldmia %[v1], {r0-r3} \n"
- "ldmia %[v2]!, {r4-r7} \n"
- SUBHALFREGS(r0, r4)
- SUBHALFREGS(r1, r5)
- SUBHALFREGS(r2, r6)
- SUBHALFREGS(r3, r7)
- "stmia %[v1]!, {r0-r3} \n"
- "ldmia %[v1], {r0-r3} \n"
- "ldmia %[v2]!, {r4-r7} \n"
- SUBHALFREGS(r0, r4)
- SUBHALFREGS(r1, r5)
- SUBHALFREGS(r2, r6)
- SUBHALFREGS(r3, r7)
- "stmia %[v1]!, {r0-r3} \n"
-#if ORDER > 16
- "subs %[cnt], %[cnt], #1 \n"
- "bne 1b \n"
-#endif
-
- "99: \n"
- : /* outputs */
-#if ORDER > 16
- [cnt]"+r"(cnt),
-#endif
- [v1] "+r"(v1),
- [v2] "+r"(v2)
- : /* inputs */
- : /* clobbers */
- "r0", "r1", "r2", "r3", "r4", "r5",
- "r6", "r7", "r8", "r9", "memory"
- );
-}
-
-/* This version fetches data as 32 bit words, and *requires* v1 to be
- * 32 bit aligned, otherwise it will result either in a data abort, or
- * incorrect results (if ARM aligncheck is disabled). It is optimised
- * for ARM7TDMI. Using it for ARM9 or higher results in worse performance
- * than the C version. */
-static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
-{
- int res = 0;
-#if ORDER > 16
- int cnt = ORDER>>4;
-#endif
-
-#define MLABLOCK2(f1, f2) \
- "mov r8, " #f1 ", lsl #16 \n" \
- "mov r8, r8 , asr #16 \n" \
- "mov r9, " #f2 ", lsl #16 \n" \
- "mov r9, r9 , asr #16 \n" \
- "mla %[res], r9, r8, %[res] \n" \
- "mov r8, " #f1 ", asr #16 \n" \
- "mov r9, " #f2 ", asr #16 \n" \
- "mla %[res], r9, r8, %[res] \n"
-
-#define MLABLOCK2_U2(f1, f2) \
- "mov r8, " #f1 ", lsl #16 \n" \
- "mov r8, r8 , asr #16 \n" \
- "mla %[res], r9, r8, %[res] \n" \
- "mov r8, " #f1 ", asr #16 \n" \
- "mov r9, " #f2 ", lsl #16 \n" \
- "mov r9, r9 , asr #16 \n" \
- "mla %[res], r9, r8, %[res] \n" \
- "mov r9, " #f2 ", asr #16 \n"
-
- asm volatile (
- "tst %[v2], #2 \n"
- "beq 20f \n"
-
- "10: \n"
- "ldrsh r9, [%[v2]], #2 \n"
- "1: \n"
- "ldmia %[v1]!, {r0-r3} \n"
- "ldmia %[v2]!, {r4-r7} \n"
- MLABLOCK2_U2(r0, r4)
- MLABLOCK2_U2(r1, r5)
- MLABLOCK2_U2(r2, r6)
- MLABLOCK2_U2(r3, r7)
- "ldmia %[v1]!, {r0-r3} \n"
- "ldmia %[v2]!, {r4-r7} \n"
- MLABLOCK2_U2(r0, r4)
- MLABLOCK2_U2(r1, r5)
- MLABLOCK2_U2(r2, r6)
- MLABLOCK2_U2(r3, r7)
-#if ORDER > 16
- "subs %[cnt], %[cnt], #1 \n"
- "bne 1b \n"
-#endif
- "b 99f \n"
-
- "20: \n"
- "1: \n"
- "ldmia %[v1]!, {r0-r3} \n"
- "ldmia %[v2]!, {r4-r7} \n"
- MLABLOCK2(r0, r4)
- MLABLOCK2(r1, r5)
- MLABLOCK2(r2, r6)
- MLABLOCK2(r3, r7)
- "ldmia %[v1]!, {r0-r3} \n"
- "ldmia %[v2]!, {r4-r7} \n"
- MLABLOCK2(r0, r4)
- MLABLOCK2(r1, r5)
- MLABLOCK2(r2, r6)
- MLABLOCK2(r3, r7)
-#if ORDER > 16
- "subs %[cnt], %[cnt], #1 \n"
- "bne 1b \n"
-#endif
-
- "99: \n"
- : /* outputs */
-#if ORDER > 16
- [cnt]"+r"(cnt),
-#endif
- [v1] "+r"(v1),
- [v2] "+r"(v2),
- [res]"+r"(res)
- : /* inputs */
- : /* clobbers */
- "r0", "r1", "r2", "r3", "r4",
- "r5", "r6", "r7", "r8", "r9"
- );
- return res;
-}
diff --git a/apps/codecs/demac/libdemac/vector_math32_armv4.h b/apps/codecs/demac/libdemac/vector_math32_armv4.h
new file mode 100644
index 0000000..b729bd3
--- /dev/null
+++ b/apps/codecs/demac/libdemac/vector_math32_armv4.h
@@ -0,0 +1,210 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+ARMv4 vector math copyright (C) 2008 Jens Arnold
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+static inline void vector_add(int32_t* v1, int32_t* v2)
+{
+#if ORDER > 32
+ int cnt = ORDER>>5;
+#endif
+
+#define ADDBLOCK4 \
+ "ldmia %[v1], {r0-r3} \n" \
+ "ldmia %[v2]!, {r4-r7} \n" \
+ "add r0, r0, r4 \n" \
+ "add r1, r1, r5 \n" \
+ "add r2, r2, r6 \n" \
+ "add r3, r3, r7 \n" \
+ "stmia %[v1]!, {r0-r3} \n"
+
+ asm volatile (
+ "1: \n"
+ ADDBLOCK4
+ ADDBLOCK4
+ ADDBLOCK4
+ ADDBLOCK4
+#if ORDER > 16
+ ADDBLOCK4
+ ADDBLOCK4
+ ADDBLOCK4
+ ADDBLOCK4
+#endif
+#if ORDER > 32
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+ : /* outputs */
+#if ORDER > 32
+ [cnt]"+r"(cnt),
+#endif
+ [v1] "+r"(v1),
+ [v2] "+r"(v2)
+ : /* inputs */
+ : /* clobbers */
+ "r0", "r1", "r2", "r3", "r4",
+ "r5", "r6", "r7", "memory"
+ );
+}
+
+static inline void vector_sub(int32_t* v1, int32_t* v2)
+{
+#if ORDER > 32
+ int cnt = ORDER>>5;
+#endif
+
+#define SUBBLOCK4 \
+ "ldmia %[v1], {r0-r3} \n" \
+ "ldmia %[v2]!, {r4-r7} \n" \
+ "sub r0, r0, r4 \n" \
+ "sub r1, r1, r5 \n" \
+ "sub r2, r2, r6 \n" \
+ "sub r3, r3, r7 \n" \
+ "stmia %[v1]!, {r0-r3} \n"
+
+ asm volatile (
+ "1: \n"
+ SUBBLOCK4
+ SUBBLOCK4
+ SUBBLOCK4
+ SUBBLOCK4
+#if ORDER > 16
+ SUBBLOCK4
+ SUBBLOCK4
+ SUBBLOCK4
+ SUBBLOCK4
+#endif
+#if ORDER > 32
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+ : /* outputs */
+#if ORDER > 32
+ [cnt]"+r"(cnt),
+#endif
+ [v1] "+r"(v1),
+ [v2] "+r"(v2)
+ : /* inputs */
+ : /* clobbers */
+ "r0", "r1", "r2", "r3", "r4",
+ "r5", "r6", "r7", "memory"
+ );
+}
+
+static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
+{
+ int res = 0;
+#if ORDER > 32
+ int cnt = ORDER>>5;
+#endif
+
+ asm volatile (
+#if ORDER > 16
+ "ldmia %[v2]!, {r6-r7} \n"
+ "1: \n"
+ "ldmia %[v1]!, {r0,r1,r3-r5} \n"
+ "mla %[res], r6, r0, %[res] \n"
+ "mla %[res], r7, r1, %[res] \n"
+ "ldmia %[v2]!, {r0-r2,r6-r8} \n"
+ "mla %[res], r0, r3, %[res] \n"
+ "mla %[res], r1, r4, %[res] \n"
+ "mla %[res], r2, r5, %[res] \n"
+ "ldmia %[v1]!, {r0-r4} \n"
+ "mla %[res], r6, r0, %[res] \n"
+ "mla %[res], r7, r1, %[res] \n"
+ "mla %[res], r8, r2, %[res] \n"
+ "ldmia %[v2]!, {r0,r1,r6-r8} \n"
+ "mla %[res], r0, r3, %[res] \n"
+ "mla %[res], r1, r4, %[res] \n"
+ "ldmia %[v1]!, {r0-r5} \n"
+ "mla %[res], r6, r0, %[res] \n"
+ "mla %[res], r7, r1, %[res] \n"
+ "mla %[res], r8, r2, %[res] \n"
+ "ldmia %[v2]!, {r0-r2,r6,r7} \n"
+ "mla %[res], r0, r3, %[res] \n"
+ "mla %[res], r1, r4, %[res] \n"
+ "mla %[res], r2, r5, %[res] \n"
+ "ldmia %[v1]!, {r0,r1,r3-r5} \n"
+ "mla %[res], r6, r0, %[res] \n"
+ "mla %[res], r7, r1, %[res] \n"
+ "ldmia %[v2]!, {r0-r2,r6-r8} \n"
+ "mla %[res], r0, r3, %[res] \n"
+ "mla %[res], r1, r4, %[res] \n"
+ "mla %[res], r2, r5, %[res] \n"
+ "ldmia %[v1]!, {r0-r4} \n"
+ "mla %[res], r6, r0, %[res] \n"
+ "mla %[res], r7, r1, %[res] \n"
+ "mla %[res], r8, r2, %[res] \n"
+ "ldmia %[v2]!, {r0,r1,r6-r8} \n"
+ "mla %[res], r0, r3, %[res] \n"
+ "mla %[res], r1, r4, %[res] \n"
+ "ldmia %[v1]!, {r0-r5} \n"
+ "mla %[res], r6, r0, %[res] \n"
+ "mla %[res], r7, r1, %[res] \n"
+ "mla %[res], r8, r2, %[res] \n"
+#if ORDER > 32
+ "ldmia %[v2]!, {r0-r2,r6,r7} \n"
+#else
+ "ldmia %[v2]!, {r0-r2} \n"
+#endif
+ "mla %[res], r0, r3, %[res] \n"
+ "mla %[res], r1, r4, %[res] \n"
+ "mla %[res], r2, r5, %[res] \n"
+#if ORDER > 32
+ "subs %[cnt], %[cnt], #1 \n"
+ "bne 1b \n"
+#endif
+
+#else /* ORDER <= 16 */
+
+#define MLABLOCK4 \
+ "ldmia %[v1]!, {r0-r3} \n" \
+ "ldmia %[v2]!, {r4-r7} \n" \
+ "mla %[res], r4, r0, %[res] \n" \
+ "mla %[res], r5, r1, %[res] \n" \
+ "mla %[res], r6, r2, %[res] \n" \
+ "mla %[res], r7, r3, %[res] \n"
+
+ MLABLOCK4
+ MLABLOCK4
+ MLABLOCK4
+ MLABLOCK4
+#endif /* ORDER <= 16 */
+ : /* outputs */
+#if ORDER > 32
+ [cnt]"+r"(cnt),
+#endif
+ [v1] "+r"(v1),
+ [v2] "+r"(v2),
+ [res]"+r"(res)
+ : /* inputs */
+ : /* clobbers */
+ "r0", "r1", "r2", "r3",
+ "r4", "r5", "r6", "r7"
+#if ORDER > 16
+ ,"r8"
+#endif
+ );
+ return res;
+}
diff --git a/apps/codecs/demac/libdemac/vector_math16.h b/apps/codecs/demac/libdemac/vector_math_generic.h
index 5d82abe..7b61db7 100644
--- a/apps/codecs/demac/libdemac/vector_math16.h
+++ b/apps/codecs/demac/libdemac/vector_math_generic.h
@@ -2,7 +2,7 @@
libdemac - A Monkey's Audio decoder
-$Id:$
+$Id$
Copyright (C) Dave Chapman 2007
@@ -22,7 +22,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
-static inline void vector_add(int16_t* v1, int16_t* v2)
+#include "demac_config.h"
+
+static inline void vector_add(filter_int* v1, filter_int* v2)
{
#if ORDER > 32
int order = (ORDER >> 5);
@@ -66,7 +68,7 @@ static inline void vector_add(int16_t* v1, int16_t* v2)
}
}
-static inline void vector_sub(int16_t* v1, int16_t* v2)
+static inline void vector_sub(filter_int* v1, filter_int* v2)
{
#if ORDER > 32
int order = (ORDER >> 5);
@@ -110,7 +112,7 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
}
}
-static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
+static inline int32_t scalarproduct(filter_int* v1, filter_int* v2)
{
int res = 0;