summaryrefslogtreecommitdiff
path: root/apps/codecs
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2008-05-17 21:26:34 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2008-05-17 21:26:34 +0000
commit18f13b149a4ce6d3b16c0b91de4d571d1860b66f (patch)
tree313f09dcd46b1e068d0abd0fb49de615d27495d5 /apps/codecs
parentc769cf586fee44c55382d2ef98ce9dbca51b6f39 (diff)
downloadrockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.zip
rockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.tar.gz
rockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.tar.bz2
rockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.tar.xz
Move ARM assembler of musepack synthesis filter to own file. Additionally add ICONST_ATTR to noise generator data.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17562 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
-rw-r--r--apps/codecs/libmusepack/SOURCES4
-rw-r--r--apps/codecs/libmusepack/math.h2
-rwxr-xr-xapps/codecs/libmusepack/mpc_config.h48
-rw-r--r--apps/codecs/libmusepack/synth_filter.c217
-rwxr-xr-xapps/codecs/libmusepack/synth_filter_arm.S155
5 files changed, 265 insertions, 161 deletions
diff --git a/apps/codecs/libmusepack/SOURCES b/apps/codecs/libmusepack/SOURCES
index 9c588e3..0de1143 100644
--- a/apps/codecs/libmusepack/SOURCES
+++ b/apps/codecs/libmusepack/SOURCES
@@ -5,4 +5,6 @@ mpc_decoder.c
requant.c
streaminfo.c
synth_filter.c
-
+#if defined(CPU_ARM)
+synth_filter_arm.S
+#endif
diff --git a/apps/codecs/libmusepack/math.h b/apps/codecs/libmusepack/math.h
index a015d45..e4c2ffc 100644
--- a/apps/codecs/libmusepack/math.h
+++ b/apps/codecs/libmusepack/math.h
@@ -38,7 +38,7 @@
#ifndef _mpcdec_math_h_
#define _mpcdec_math_h_
-#define MPC_FIXED_POINT
+#include "mpc_config.h"
#define MPC_FIXED_POINT_SHIFT 16
diff --git a/apps/codecs/libmusepack/mpc_config.h b/apps/codecs/libmusepack/mpc_config.h
new file mode 100755
index 0000000..6993775
--- /dev/null
+++ b/apps/codecs/libmusepack/mpc_config.h
@@ -0,0 +1,48 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2008 by Andree Buschmann
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#ifndef _mpc_config_h_
+#define _mpc_config_h_
+
+#include "config.h"
+
+/* choose fixed point or floating point */
+#define MPC_FIXED_POINT
+
+#ifndef MPC_FIXED_POINT
+#error FIXME, mpc will not with floating point now
+#endif
+
+/* choose speed vs. accuracy for MPC_FIXED_POINT
+ * speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy
+ * equals about 5 dB SNR (15bit output precision) to not use the speed-optimization
+ * -> comment OPTIMIZE_FOR_SPEED here for desired target */
+#if defined(MPC_FIXED_POINT)
+ #if defined(CPU_COLDFIRE)
+ // do nothing
+ #elif defined(CPU_ARM)
+ #define OPTIMIZE_FOR_SPEED
+ #else
+ #define OPTIMIZE_FOR_SPEED
+ #endif
+#else
+ // do nothing
+#endif
+
+#endif
diff --git a/apps/codecs/libmusepack/synth_filter.c b/apps/codecs/libmusepack/synth_filter.c
index faf014c..d48b563 100644
--- a/apps/codecs/libmusepack/synth_filter.c
+++ b/apps/codecs/libmusepack/synth_filter.c
@@ -39,22 +39,6 @@
#include "musepack.h"
#include "internal.h"
-/* S E T T I N G S */
-// choose speed vs. accuracy for MPC_FIXED_POINT
-// speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy equals about 5 dB SNR (15bit output precision)
-// to not use the speed-optimization -> comment OPTIMIZE_FOR_SPEED
-#if defined(MPC_FIXED_POINT)
- #if defined(CPU_COLDFIRE)
- // do nothing
- #elif defined(CPU_ARM)
- #define OPTIMIZE_FOR_SPEED
- #else
- #define OPTIMIZE_FOR_SPEED
- #endif
-#else
- // do nothing
-#endif
-
/* C O N S T A N T S */
#undef _
@@ -82,40 +66,40 @@
#endif
// Di_opt coefficients are +/- 2^17
-static const MPC_SAMPLE_FORMAT Di_opt [32] [16] ICONST_ATTR = {
- /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
- { _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29) },
- { _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26) },
- { _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24) },
- { _( -1), _( -38), _( 225), _( -645), _( 1893), _(-6237), _( 4561), _(-43006), _(74630), _(31947), _(8092), _( 4063), _(2087), _( 294), _(196), _(21) },
- { _( -1), _( -41), _( 227), _( -711), _( 1822), _(-6589), _( 3776), _(-44821), _(74313), _(30112), _(8492), _( 3705), _(2085), _( 244), _(190), _(19) },
- { _( -1), _( -45), _( 228), _( -779), _( 1739), _(-6935), _( 2935), _(-46617), _(73908), _(28289), _(8840), _( 3351), _(2075), _( 197), _(183), _(17) },
- { _( -1), _( -49), _( 228), _( -848), _( 1644), _(-7271), _( 2037), _(-48390), _(73415), _(26482), _(9139), _( 3004), _(2057), _( 153), _(176), _(16) },
- { _( -2), _( -53), _( 227), _( -919), _( 1535), _(-7597), _( 1082), _(-50137), _(72835), _(24694), _(9389), _( 2663), _(2032), _( 111), _(169), _(14) },
- { _( -2), _( -58), _( 224), _( -991), _( 1414), _(-7910), _( 70), _(-51853), _(72169), _(22929), _(9592), _( 2330), _(2001), _( 72), _(161), _(13) },
- { _( -2), _( -63), _( 221), _(-1064), _( 1280), _(-8209), _( -998), _(-53534), _(71420), _(21189), _(9750), _( 2006), _(1962), _( 36), _(154), _(11) },
- { _( -2), _( -68), _( 215), _(-1137), _( 1131), _(-8491), _( -2122), _(-55178), _(70590), _(19478), _(9863), _( 1692), _(1919), _( 2), _(147), _(10) },
- { _( -3), _( -73), _( 208), _(-1210), _( 970), _(-8755), _( -3300), _(-56778), _(69679), _(17799), _(9935), _( 1388), _(1870), _( -29), _(139), _( 9) },
- { _( -3), _( -79), _( 200), _(-1283), _( 794), _(-8998), _( -4533), _(-58333), _(68692), _(16155), _(9966), _( 1095), _(1817), _( -57), _(132), _( 8) },
- { _( -4), _( -85), _( 189), _(-1356), _( 605), _(-9219), _( -5818), _(-59838), _(67629), _(14548), _(9959), _( 814), _(1759), _( -83), _(125), _( 7) },
- { _( -4), _( -91), _( 177), _(-1428), _( 402), _(-9416), _( -7154), _(-61289), _(66494), _(12980), _(9916), _( 545), _(1698), _(-106), _(117), _( 7) },
- { _( -5), _( -97), _( 163), _(-1498), _( 185), _(-9585), _( -8540), _(-62684), _(65290), _(11455), _(9838), _( 288), _(1634), _(-127), _(111), _( 6) },
- { _( -5), _(-104), _( 146), _(-1567), _( -45), _(-9727), _( -9975), _(-64019), _(64019), _( 9975), _(9727), _( 45), _(1567), _(-146), _(104), _( 5) },
- { _( -6), _(-111), _( 127), _(-1634), _( -288), _(-9838), _(-11455), _(-65290), _(62684), _( 8540), _(9585), _( -185), _(1498), _(-163), _( 97), _( 5) },
- { _( -7), _(-117), _( 106), _(-1698), _( -545), _(-9916), _(-12980), _(-66494), _(61289), _( 7154), _(9416), _( -402), _(1428), _(-177), _( 91), _( 4) },
- { _( -7), _(-125), _( 83), _(-1759), _( -814), _(-9959), _(-14548), _(-67629), _(59838), _( 5818), _(9219), _( -605), _(1356), _(-189), _( 85), _( 4) },
- { _( -8), _(-132), _( 57), _(-1817), _(-1095), _(-9966), _(-16155), _(-68692), _(58333), _( 4533), _(8998), _( -794), _(1283), _(-200), _( 79), _( 3) },
- { _( -9), _(-139), _( 29), _(-1870), _(-1388), _(-9935), _(-17799), _(-69679), _(56778), _( 3300), _(8755), _( -970), _(1210), _(-208), _( 73), _( 3) },
- { _(-10), _(-147), _( -2), _(-1919), _(-1692), _(-9863), _(-19478), _(-70590), _(55178), _( 2122), _(8491), _(-1131), _(1137), _(-215), _( 68), _( 2) },
- { _(-11), _(-154), _( -36), _(-1962), _(-2006), _(-9750), _(-21189), _(-71420), _(53534), _( 998), _(8209), _(-1280), _(1064), _(-221), _( 63), _( 2) },
- { _(-13), _(-161), _( -72), _(-2001), _(-2330), _(-9592), _(-22929), _(-72169), _(51853), _( -70), _(7910), _(-1414), _( 991), _(-224), _( 58), _( 2) },
- { _(-14), _(-169), _(-111), _(-2032), _(-2663), _(-9389), _(-24694), _(-72835), _(50137), _(-1082), _(7597), _(-1535), _( 919), _(-227), _( 53), _( 2) },
- { _(-16), _(-176), _(-153), _(-2057), _(-3004), _(-9139), _(-26482), _(-73415), _(48390), _(-2037), _(7271), _(-1644), _( 848), _(-228), _( 49), _( 1) },
- { _(-17), _(-183), _(-197), _(-2075), _(-3351), _(-8840), _(-28289), _(-73908), _(46617), _(-2935), _(6935), _(-1739), _( 779), _(-228), _( 45), _( 1) },
- { _(-19), _(-190), _(-244), _(-2085), _(-3705), _(-8492), _(-30112), _(-74313), _(44821), _(-3776), _(6589), _(-1822), _( 711), _(-227), _( 41), _( 1) },
- { _(-21), _(-196), _(-294), _(-2087), _(-4063), _(-8092), _(-31947), _(-74630), _(43006), _(-4561), _(6237), _(-1893), _( 645), _(-225), _( 38), _( 1) },
- { _(-24), _(-202), _(-347), _(-2080), _(-4425), _(-7640), _(-33791), _(-74856), _(41176), _(-5288), _(5879), _(-1952), _( 581), _(-222), _( 35), _( 1) },
- { _(-26), _(-208), _(-401), _(-2063), _(-4788), _(-7134), _(-35640), _(-74992), _(39336), _(-5959), _(5517), _(-2000), _( 519), _(-218), _( 31), _( 1) }
+static const MPC_SAMPLE_FORMAT Di_opt [512] ICONST_ATTR = {
+/* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
+/* 0 */ _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29),
+/* 1 */ _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26),
+/* 2 */ _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24),
+/* 3 */ _( -1), _( -38), _( 225), _( -645), _( 1893), _(-6237), _( 4561), _(-43006), _(74630), _(31947), _(8092), _( 4063), _(2087), _( 294), _(196), _(21),
+/* 4 */ _( -1), _( -41), _( 227), _( -711), _( 1822), _(-6589), _( 3776), _(-44821), _(74313), _(30112), _(8492), _( 3705), _(2085), _( 244), _(190), _(19),
+/* 5 */ _( -1), _( -45), _( 228), _( -779), _( 1739), _(-6935), _( 2935), _(-46617), _(73908), _(28289), _(8840), _( 3351), _(2075), _( 197), _(183), _(17),
+/* 6 */ _( -1), _( -49), _( 228), _( -848), _( 1644), _(-7271), _( 2037), _(-48390), _(73415), _(26482), _(9139), _( 3004), _(2057), _( 153), _(176), _(16),
+/* 7 */ _( -2), _( -53), _( 227), _( -919), _( 1535), _(-7597), _( 1082), _(-50137), _(72835), _(24694), _(9389), _( 2663), _(2032), _( 111), _(169), _(14),
+/* 8 */ _( -2), _( -58), _( 224), _( -991), _( 1414), _(-7910), _( 70), _(-51853), _(72169), _(22929), _(9592), _( 2330), _(2001), _( 72), _(161), _(13),
+/* 9 */ _( -2), _( -63), _( 221), _(-1064), _( 1280), _(-8209), _( -998), _(-53534), _(71420), _(21189), _(9750), _( 2006), _(1962), _( 36), _(154), _(11),
+/* 10 */ _( -2), _( -68), _( 215), _(-1137), _( 1131), _(-8491), _( -2122), _(-55178), _(70590), _(19478), _(9863), _( 1692), _(1919), _( 2), _(147), _(10),
+/* 11 */ _( -3), _( -73), _( 208), _(-1210), _( 970), _(-8755), _( -3300), _(-56778), _(69679), _(17799), _(9935), _( 1388), _(1870), _( -29), _(139), _( 9),
+/* 12 */ _( -3), _( -79), _( 200), _(-1283), _( 794), _(-8998), _( -4533), _(-58333), _(68692), _(16155), _(9966), _( 1095), _(1817), _( -57), _(132), _( 8),
+/* 13 */ _( -4), _( -85), _( 189), _(-1356), _( 605), _(-9219), _( -5818), _(-59838), _(67629), _(14548), _(9959), _( 814), _(1759), _( -83), _(125), _( 7),
+/* 14 */ _( -4), _( -91), _( 177), _(-1428), _( 402), _(-9416), _( -7154), _(-61289), _(66494), _(12980), _(9916), _( 545), _(1698), _(-106), _(117), _( 7),
+/* 15 */ _( -5), _( -97), _( 163), _(-1498), _( 185), _(-9585), _( -8540), _(-62684), _(65290), _(11455), _(9838), _( 288), _(1634), _(-127), _(111), _( 6),
+/* 16 */ _( -5), _(-104), _( 146), _(-1567), _( -45), _(-9727), _( -9975), _(-64019), _(64019), _( 9975), _(9727), _( 45), _(1567), _(-146), _(104), _( 5),
+/* 17 */ _( -6), _(-111), _( 127), _(-1634), _( -288), _(-9838), _(-11455), _(-65290), _(62684), _( 8540), _(9585), _( -185), _(1498), _(-163), _( 97), _( 5),
+/* 18 */ _( -7), _(-117), _( 106), _(-1698), _( -545), _(-9916), _(-12980), _(-66494), _(61289), _( 7154), _(9416), _( -402), _(1428), _(-177), _( 91), _( 4),
+/* 19 */ _( -7), _(-125), _( 83), _(-1759), _( -814), _(-9959), _(-14548), _(-67629), _(59838), _( 5818), _(9219), _( -605), _(1356), _(-189), _( 85), _( 4),
+/* 20 */ _( -8), _(-132), _( 57), _(-1817), _(-1095), _(-9966), _(-16155), _(-68692), _(58333), _( 4533), _(8998), _( -794), _(1283), _(-200), _( 79), _( 3),
+/* 21 */ _( -9), _(-139), _( 29), _(-1870), _(-1388), _(-9935), _(-17799), _(-69679), _(56778), _( 3300), _(8755), _( -970), _(1210), _(-208), _( 73), _( 3),
+/* 22 */ _(-10), _(-147), _( -2), _(-1919), _(-1692), _(-9863), _(-19478), _(-70590), _(55178), _( 2122), _(8491), _(-1131), _(1137), _(-215), _( 68), _( 2),
+/* 23 */ _(-11), _(-154), _( -36), _(-1962), _(-2006), _(-9750), _(-21189), _(-71420), _(53534), _( 998), _(8209), _(-1280), _(1064), _(-221), _( 63), _( 2),
+/* 24 */ _(-13), _(-161), _( -72), _(-2001), _(-2330), _(-9592), _(-22929), _(-72169), _(51853), _( -70), _(7910), _(-1414), _( 991), _(-224), _( 58), _( 2),
+/* 25 */ _(-14), _(-169), _(-111), _(-2032), _(-2663), _(-9389), _(-24694), _(-72835), _(50137), _(-1082), _(7597), _(-1535), _( 919), _(-227), _( 53), _( 2),
+/* 26 */ _(-16), _(-176), _(-153), _(-2057), _(-3004), _(-9139), _(-26482), _(-73415), _(48390), _(-2037), _(7271), _(-1644), _( 848), _(-228), _( 49), _( 1),
+/* 27 */ _(-17), _(-183), _(-197), _(-2075), _(-3351), _(-8840), _(-28289), _(-73908), _(46617), _(-2935), _(6935), _(-1739), _( 779), _(-228), _( 45), _( 1),
+/* 28 */ _(-19), _(-190), _(-244), _(-2085), _(-3705), _(-8492), _(-30112), _(-74313), _(44821), _(-3776), _(6589), _(-1822), _( 711), _(-227), _( 41), _( 1),
+/* 29 */ _(-21), _(-196), _(-294), _(-2087), _(-4063), _(-8092), _(-31947), _(-74630), _(43006), _(-4561), _(6237), _(-1893), _( 645), _(-225), _( 38), _( 1),
+/* 30 */ _(-24), _(-202), _(-347), _(-2080), _(-4425), _(-7640), _(-33791), _(-74856), _(41176), _(-5288), _(5879), _(-1952), _( 581), _(-222), _( 35), _( 1),
+/* 31 */ _(-26), _(-208), _(-401), _(-2063), _(-4788), _(-7134), _(-35640), _(-74992), _(39336), _(-5959), _(5517), _(-2000), _( 519), _(-218), _( 31), _( 1)
};
#undef _
@@ -457,69 +441,30 @@ mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
// total: 111 adds, 107 subs, 80 muls, 80 shifts
}
+#if defined(CPU_ARM)
+extern void
+mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
+ const MPC_SAMPLE_FORMAT * V,
+ const MPC_SAMPLE_FORMAT * D);
+#else
static void
-mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V)
+mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
+ const MPC_SAMPLE_FORMAT * V,
+ const MPC_SAMPLE_FORMAT * D)
{
- const MPC_SAMPLE_FORMAT *D = (const MPC_SAMPLE_FORMAT *) &Di_opt;
mpc_int32_t k;
#if defined(OPTIMIZE_FOR_SPEED)
- #if defined(CPU_ARM)
- // 32=32x32-multiply assembler for ARM
- for ( k = 0; k < 32; k++, V++ )
- {
- asm volatile (
- "ldmia %[D]!, { r0-r7 } \n\t"
- "ldr r8, [%[V]] \n\t"
- "mul r9, r0, r8 \n\t"
- "ldr r8, [%[V], #96*4] \n\t"
- "mla r9, r1, r8, r9 \n\t"
- "ldr r8, [%[V], #128*4] \n\t"
- "mla r9, r2, r8, r9 \n\t"
- "ldr r8, [%[V], #224*4] \n\t"
- "mla r9, r3, r8, r9 \n\t"
- "ldr r8, [%[V], #256*4] \n\t"
- "mla r9, r4, r8, r9 \n\t"
- "ldr r8, [%[V], #352*4] \n\t"
- "mla r9, r5, r8, r9 \n\t"
- "ldr r8, [%[V], #384*4] \n\t"
- "mla r9, r6, r8, r9 \n\t"
- "ldr r8, [%[V], #480*4] \n\t"
- "mla r9, r7, r8, r9 \n\t"
- "ldmia %[D]!, { r0-r7 } \n\t"
- "ldr r8, [%[V], #512*4] \n\t"
- "mla r9, r0, r8, r9 \n\t"
- "ldr r8, [%[V], #608*4] \n\t"
- "mla r9, r1, r8, r9 \n\t"
- "ldr r8, [%[V], #640*4] \n\t"
- "mla r9, r2, r8, r9 \n\t"
- "ldr r8, [%[V], #736*4] \n\t"
- "mla r9, r3, r8, r9 \n\t"
- "ldr r8, [%[V], #768*4] \n\t"
- "mla r9, r4, r8, r9 \n\t"
- "ldr r8, [%[V], #864*4] \n\t"
- "mla r9, r5, r8, r9 \n\t"
- "ldr r8, [%[V], #896*4] \n\t"
- "mla r9, r6, r8, r9 \n\t"
- "ldr r8, [%[V], #992*4] \n\t"
- "mla r9, r7, r8, r9 \n\t"
- "str r9, [%[Data]], #4 \n"
- : [Data] "+r" (Data), [D] "+r" (D)
- : [V] "r" (V)
- : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9");
- }
- #else
// 32=32x32-multiply (FIXED_POINT)
for ( k = 0; k < 32; k++, D += 16, V++ )
{
*Data = V[ 0]*D[ 0] + V[ 96]*D[ 1] + V[128]*D[ 2] + V[224]*D[ 3]
- + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7]
- + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11]
- + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15];
+ + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7]
+ + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11]
+ + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15];
Data += 1;
- // total: 16 muls, 15 adds
+ // total: 32 * (16 muls, 15 adds)
}
- #endif
#else
#if defined(CPU_COLDFIRE)
// 64=32x32-multiply assembler for Coldfire
@@ -553,71 +498,25 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V)
: [V] "a" (V), [D] "a" (D)
: "d0", "d1", "d2", "d3", "a5");
}
- #elif defined(CPU_ARM)
- // 64=32x32-multiply assembler for ARM
- for ( k = 0; k < 32; k++, V++ )
- {
- asm volatile (
- "ldmia %[D]!, { r0-r3 } \n\t"
- "ldr r4, [%[V]] \n\t"
- "smull r5, r6, r0, r4 \n\t"
- "ldr r4, [%[V], #96*4] \n\t"
- "smlal r5, r6, r1, r4 \n\t"
- "ldr r4, [%[V], #128*4] \n\t"
- "smlal r5, r6, r2, r4 \n\t"
- "ldr r4, [%[V], #224*4] \n\t"
- "smlal r5, r6, r3, r4 \n\t"
-
- "ldmia %[D]!, { r0-r3 } \n\t"
- "ldr r4, [%[V], #256*4] \n\t"
- "smlal r5, r6, r0, r4 \n\t"
- "ldr r4, [%[V], #352*4] \n\t"
- "smlal r5, r6, r1, r4 \n\t"
- "ldr r4, [%[V], #384*4] \n\t"
- "smlal r5, r6, r2, r4 \n\t"
- "ldr r4, [%[V], #480*4] \n\t"
- "smlal r5, r6, r3, r4 \n\t"
-
- "ldmia %[D]!, { r0-r3 } \n\t"
- "ldr r4, [%[V], #512*4] \n\t"
- "smlal r5, r6, r0, r4 \n\t"
- "ldr r4, [%[V], #608*4] \n\t"
- "smlal r5, r6, r1, r4 \n\t"
- "ldr r4, [%[V], #640*4] \n\t"
- "smlal r5, r6, r2, r4 \n\t"
- "ldr r4, [%[V], #736*4] \n\t"
- "smlal r5, r6, r3, r4 \n\t"
-
- "ldmia %[D]!, { r0-r3 } \n\t"
- "ldr r4, [%[V], #768*4] \n\t"
- "smlal r5, r6, r0, r4 \n\t"
- "ldr r4, [%[V], #864*4] \n\t"
- "smlal r5, r6, r1, r4 \n\t"
- "ldr r4, [%[V], #896*4] \n\t"
- "smlal r5, r6, r2, r4 \n\t"
- "ldr r4, [%[V], #992*4] \n\t"
- "smlal r5, r6, r3, r4 \n\t"
- "mov r4, r6, lsl #1 \n\t"
- "orr r4, r4, r5, lsr #31\n\t"
- "str r4, [%[Data]], #4 \n"
- : [Data] "+r" (Data), [D] "+r" (D)
- : [V] "r" (V)
- : "r0", "r1", "r2", "r3", "r4", "r5", "r6");
- }
#else
// 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C
for ( k = 0; k < 32; k++, D += 16, V++ )
{
- *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31) + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31)
- + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31) + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31)
- + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31) + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31)
- + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31) + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31);
+ *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31)
+ + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31)
+ + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31)
+ + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31)
+ + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31)
+ + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31)
+ + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31)
+ + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31);
Data += 1;
// total: 16 muls, 15 adds, 16 shifts
}
#endif
#endif
}
+#endif /* CPU_ARM */
static void
mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y)
@@ -630,7 +529,7 @@ mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, cons
{
V -= 64;
mpc_calculate_new_V ( Y, V );
- mpc_decoder_windowing_D( OutData, V);
+ mpc_decoder_windowing_D( OutData, V, Di_opt );
}
}
}
@@ -661,7 +560,7 @@ mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT *OutData)
/* */
/*******************************************/
-static const unsigned char Parity [256] = { // parity
+static const unsigned char Parity [256] ICONST_ATTR = { // parity
0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S
new file mode 100755
index 0000000..ce668e8
--- /dev/null
+++ b/apps/codecs/libmusepack/synth_filter_arm.S
@@ -0,0 +1,155 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2008 by Andree Buschmann
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "mpc_config.h"
+
+ .section .text, "ax", %progbits
+
+/****************************************************************************
+ * void mpc_decoder_windowing_D(...)
+ *
+ * 2nd step within synthesis filter. Does the dewindowing.
+ * 32=32x32 multiplies (OPTIMIZE_FOR_SPEED)
+ * Uses pre-shifted V[] and D[] values.
+ ****************************************************************************/
+#if defined(OPTIMIZE_FOR_SPEED)
+ .align 2
+ .global mpc_decoder_windowing_D
+ .type mpc_decoder_windowing_D, %function
+mpc_decoder_windowing_D:
+ /* r0 = Data[] */
+ /* r1 = V[] */
+ /* r2 = D[] */
+ /* lr = counter */
+
+ stmfd sp!, {r4-r12, lr}
+
+ mov lr, #32
+.loop32:
+ ldmia r2!, { r3-r10 } /* load first 8 window coefficients */
+ ldr r11, [r1] /* 0 */
+ mul r12, r3, r11
+ ldr r11, [r1, #96*4] /* 1 */
+ mla r12, r4, r11, r12
+ ldr r11, [r1, #128*4] /* 2 */
+ mla r12, r5, r11, r12
+ ldr r11, [r1, #224*4] /* 3 */
+ mla r12, r6, r11, r12
+ ldr r11, [r1, #256*4] /* 4 */
+ mla r12, r7, r11, r12
+ ldr r11, [r1, #352*4] /* 5 */
+ mla r12, r8, r11, r12
+ ldr r11, [r1, #384*4] /* 6 */
+ mla r12, r9, r11, r12
+ ldr r11, [r1, #480*4] /* 7 */
+ mla r12, r10, r11, r12
+ ldmia r2!, { r3-r10 } /* load last 8 window coefficients */
+ ldr r11, [r1, #512*4] /* 8 */
+ mla r12, r3, r11, r12
+ ldr r11, [r1, #608*4] /* 9 */
+ mla r12, r4, r11, r12
+ ldr r11, [r1, #640*4] /* 10 */
+ mla r12, r5, r11, r12
+ ldr r11, [r1, #736*4] /* 11 */
+ mla r12, r6, r11, r12
+ ldr r11, [r1, #768*4] /* 12 */
+ mla r12, r7, r11, r12
+ ldr r11, [r1, #864*4] /* 13 */
+ mla r12, r8, r11, r12
+ ldr r11, [r1, #896*4] /* 14 */
+ mla r12, r9, r11, r12
+ ldr r11, [r1, #992*4] /* 15 */
+ mla r12, r10, r11, r12
+ str r12, [r0], #4 /* store Data */
+ add r1, r1, #4 /* V++ */
+
+ subs lr, lr, #1
+ bgt .loop32
+
+ ldmfd sp!, {r4-r12, pc}
+.mpc_dewindowing_end:
+ .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
+#else
+/****************************************************************************
+ * void mpc_decoder_windowing_D(...)
+ *
+ * 2nd step within synthesis filter. Does the dewindowing.
+ * 64=32x32 multiplies
+ * Drops lo-part of 64bit multiply results and will therefor loose 1 bit
+ * accuracy. The decoder output is binary identical as this imprecision is
+ * far below the output's 16bit resolution.
+ ****************************************************************************/
+ .align 2
+ .global mpc_decoder_windowing_D
+ .type mpc_decoder_windowing_D, %function
+mpc_decoder_windowing_D:
+ /* r0 = Data[] */
+ /* r1 = V[] */
+ /* r2 = D[] */
+ /* lr = counter */
+
+ stmfd sp!, {r4-r12, lr}
+
+ mov lr, #32
+.loop32:
+ ldmia r2!, { r3-r10 } /* load first 8 window coefficients */
+ ldr r11, [r1] /* 0 */
+ smull r11, r12, r3, r11
+ ldr r11, [r1, #96*4] /* 1 */
+ smlal r11, r12, r4, r11
+ ldr r11, [r1, #128*4] /* 2 */
+ smlal r11, r12, r5, r11
+ ldr r11, [r1, #224*4] /* 3 */
+ smlal r11, r12, r6, r11
+ ldr r11, [r1, #256*4] /* 4 */
+ smlal r11, r12, r7, r11
+ ldr r11, [r1, #352*4] /* 5 */
+ smlal r11, r12, r8, r11
+ ldr r11, [r1, #384*4] /* 6 */
+ smlal r11, r12, r9, r11
+ ldr r11, [r1, #480*4] /* 7 */
+ smlal r11, r12, r10, r11
+ ldmia r2!, { r3-r10 } /* load last 8 window coefficients */
+ ldr r11, [r1, #512*4] /* 8 */
+ smlal r11, r12, r3, r11
+ ldr r11, [r1, #608*4] /* 9 */
+ smlal r11, r12, r4, r11
+ ldr r11, [r1, #640*4] /* 10 */
+ smlal r11, r12, r5, r11
+ ldr r11, [r1, #736*4] /* 11 */
+ smlal r11, r12, r6, r11
+ ldr r11, [r1, #768*4] /* 12 */
+ smlal r11, r12, r7, r11
+ ldr r11, [r1, #864*4] /* 13 */
+ smlal r11, r12, r8, r11
+ ldr r11, [r1, #896*4] /* 14 */
+ smlal r11, r12, r9, r11
+ ldr r11, [r1, #992*4] /* 15 */
+ smlal r11, r12, r10, r11
+ mov r4, r12, lsl #1 /* get result from hi-part */
+ str r4, [r0], #4 /* store Data */
+ add r1, r1, #4 /* V++ */
+
+ subs lr, lr, #1
+ bgt .loop32
+
+ ldmfd sp!, {r4-r12, pc}
+.mpc_dewindowing_end:
+ .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
+#endif