diff options
| author | Andree Buschmann <AndreeBuschmann@t-online.de> | 2008-05-17 21:26:34 +0000 |
|---|---|---|
| committer | Andree Buschmann <AndreeBuschmann@t-online.de> | 2008-05-17 21:26:34 +0000 |
| commit | 18f13b149a4ce6d3b16c0b91de4d571d1860b66f (patch) | |
| tree | 313f09dcd46b1e068d0abd0fb49de615d27495d5 /apps/codecs | |
| parent | c769cf586fee44c55382d2ef98ce9dbca51b6f39 (diff) | |
| download | rockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.zip rockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.tar.gz rockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.tar.bz2 rockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.tar.xz | |
Move ARM assembler of musepack synthesis filter to own file. Additionally add ICONST_ATTR to noise generator data.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17562 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
| -rw-r--r-- | apps/codecs/libmusepack/SOURCES | 4 | ||||
| -rw-r--r-- | apps/codecs/libmusepack/math.h | 2 | ||||
| -rwxr-xr-x | apps/codecs/libmusepack/mpc_config.h | 48 | ||||
| -rw-r--r-- | apps/codecs/libmusepack/synth_filter.c | 217 | ||||
| -rwxr-xr-x | apps/codecs/libmusepack/synth_filter_arm.S | 155 |
5 files changed, 265 insertions, 161 deletions
diff --git a/apps/codecs/libmusepack/SOURCES b/apps/codecs/libmusepack/SOURCES index 9c588e3..0de1143 100644 --- a/apps/codecs/libmusepack/SOURCES +++ b/apps/codecs/libmusepack/SOURCES @@ -5,4 +5,6 @@ mpc_decoder.c requant.c streaminfo.c synth_filter.c - +#if defined(CPU_ARM) +synth_filter_arm.S +#endif diff --git a/apps/codecs/libmusepack/math.h b/apps/codecs/libmusepack/math.h index a015d45..e4c2ffc 100644 --- a/apps/codecs/libmusepack/math.h +++ b/apps/codecs/libmusepack/math.h @@ -38,7 +38,7 @@ #ifndef _mpcdec_math_h_ #define _mpcdec_math_h_ -#define MPC_FIXED_POINT +#include "mpc_config.h" #define MPC_FIXED_POINT_SHIFT 16 diff --git a/apps/codecs/libmusepack/mpc_config.h b/apps/codecs/libmusepack/mpc_config.h new file mode 100755 index 0000000..6993775 --- /dev/null +++ b/apps/codecs/libmusepack/mpc_config.h @@ -0,0 +1,48 @@ +/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2008 by Andree Buschmann
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#ifndef _mpc_config_h_
+#define _mpc_config_h_
+
+#include "config.h"
+
+/* choose fixed point or floating point */
+#define MPC_FIXED_POINT
+
+#ifndef MPC_FIXED_POINT
+#error FIXME, mpc will not with floating point now
+#endif
+
+/* choose speed vs. accuracy for MPC_FIXED_POINT
+ * speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy
+ * equals about 5 dB SNR (15bit output precision) to not use the speed-optimization
+ * -> comment OPTIMIZE_FOR_SPEED here for desired target */
+#if defined(MPC_FIXED_POINT)
+ #if defined(CPU_COLDFIRE)
+ // do nothing
+ #elif defined(CPU_ARM)
+ #define OPTIMIZE_FOR_SPEED
+ #else
+ #define OPTIMIZE_FOR_SPEED
+ #endif
+#else
+ // do nothing
+#endif
+
+#endif
diff --git a/apps/codecs/libmusepack/synth_filter.c b/apps/codecs/libmusepack/synth_filter.c index faf014c..d48b563 100644 --- a/apps/codecs/libmusepack/synth_filter.c +++ b/apps/codecs/libmusepack/synth_filter.c @@ -39,22 +39,6 @@ #include "musepack.h" #include "internal.h" -/* S E T T I N G S */ -// choose speed vs. accuracy for MPC_FIXED_POINT -// speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy equals about 5 dB SNR (15bit output precision) -// to not use the speed-optimization -> comment OPTIMIZE_FOR_SPEED -#if defined(MPC_FIXED_POINT) - #if defined(CPU_COLDFIRE) - // do nothing - #elif defined(CPU_ARM) - #define OPTIMIZE_FOR_SPEED - #else - #define OPTIMIZE_FOR_SPEED - #endif -#else - // do nothing -#endif - /* C O N S T A N T S */ #undef _ @@ -82,40 +66,40 @@ #endif // Di_opt coefficients are +/- 2^17 -static const MPC_SAMPLE_FORMAT Di_opt [32] [16] ICONST_ATTR = { - /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ - { _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29) }, - { _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26) }, - { _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24) }, - { _( -1), _( -38), _( 225), _( -645), _( 1893), _(-6237), _( 4561), _(-43006), _(74630), _(31947), _(8092), _( 4063), _(2087), _( 294), _(196), _(21) }, - { _( -1), _( -41), _( 227), _( -711), _( 1822), _(-6589), _( 3776), _(-44821), _(74313), _(30112), _(8492), _( 3705), _(2085), _( 244), _(190), _(19) }, - { _( -1), _( -45), _( 228), _( -779), _( 1739), _(-6935), _( 2935), _(-46617), _(73908), _(28289), _(8840), _( 3351), _(2075), _( 197), _(183), _(17) }, - { _( -1), _( -49), _( 228), _( -848), _( 1644), _(-7271), _( 2037), _(-48390), _(73415), _(26482), _(9139), _( 3004), _(2057), _( 153), _(176), _(16) }, - { _( -2), _( -53), _( 227), _( -919), _( 1535), _(-7597), _( 1082), _(-50137), _(72835), _(24694), _(9389), _( 2663), _(2032), _( 111), _(169), _(14) }, - { _( -2), _( -58), _( 224), _( -991), _( 1414), _(-7910), _( 70), _(-51853), _(72169), _(22929), _(9592), _( 2330), _(2001), _( 72), _(161), _(13) }, - { _( -2), _( -63), _( 221), _(-1064), _( 1280), _(-8209), _( -998), _(-53534), _(71420), _(21189), _(9750), _( 2006), _(1962), _( 36), _(154), _(11) }, - { _( -2), _( -68), _( 215), _(-1137), _( 1131), _(-8491), _( -2122), _(-55178), _(70590), _(19478), _(9863), _( 1692), _(1919), _( 2), _(147), _(10) }, - { _( -3), _( -73), _( 208), _(-1210), _( 970), _(-8755), _( -3300), _(-56778), _(69679), _(17799), _(9935), _( 1388), _(1870), _( -29), _(139), _( 9) }, - { _( -3), _( -79), _( 200), _(-1283), _( 794), _(-8998), _( -4533), _(-58333), _(68692), _(16155), _(9966), _( 1095), _(1817), _( -57), _(132), _( 8) }, - { _( -4), _( -85), _( 189), _(-1356), _( 605), _(-9219), _( -5818), _(-59838), _(67629), _(14548), _(9959), _( 814), _(1759), _( -83), _(125), _( 7) }, - { _( -4), _( -91), _( 177), _(-1428), _( 402), _(-9416), _( -7154), _(-61289), _(66494), _(12980), _(9916), _( 545), _(1698), _(-106), _(117), _( 7) }, - { _( -5), _( -97), _( 163), _(-1498), _( 185), _(-9585), _( -8540), _(-62684), _(65290), _(11455), _(9838), _( 288), _(1634), _(-127), _(111), _( 6) }, - { _( -5), _(-104), _( 146), _(-1567), _( -45), _(-9727), _( -9975), _(-64019), _(64019), _( 9975), _(9727), _( 45), _(1567), _(-146), _(104), _( 5) }, - { _( -6), _(-111), _( 127), _(-1634), _( -288), _(-9838), _(-11455), _(-65290), _(62684), _( 8540), _(9585), _( -185), _(1498), _(-163), _( 97), _( 5) }, - { _( -7), _(-117), _( 106), _(-1698), _( -545), _(-9916), _(-12980), _(-66494), _(61289), _( 7154), _(9416), _( -402), _(1428), _(-177), _( 91), _( 4) }, - { _( -7), _(-125), _( 83), _(-1759), _( -814), _(-9959), _(-14548), _(-67629), _(59838), _( 5818), _(9219), _( -605), _(1356), _(-189), _( 85), _( 4) }, - { _( -8), _(-132), _( 57), _(-1817), _(-1095), _(-9966), _(-16155), _(-68692), _(58333), _( 4533), _(8998), _( -794), _(1283), _(-200), _( 79), _( 3) }, - { _( -9), _(-139), _( 29), _(-1870), _(-1388), _(-9935), _(-17799), _(-69679), _(56778), _( 3300), _(8755), _( -970), _(1210), _(-208), _( 73), _( 3) }, - { _(-10), _(-147), _( -2), _(-1919), _(-1692), _(-9863), _(-19478), _(-70590), _(55178), _( 2122), _(8491), _(-1131), _(1137), _(-215), _( 68), _( 2) }, - { _(-11), _(-154), _( -36), _(-1962), _(-2006), _(-9750), _(-21189), _(-71420), _(53534), _( 998), _(8209), _(-1280), _(1064), _(-221), _( 63), _( 2) }, - { _(-13), _(-161), _( -72), _(-2001), _(-2330), _(-9592), _(-22929), _(-72169), _(51853), _( -70), _(7910), _(-1414), _( 991), _(-224), _( 58), _( 2) }, - { _(-14), _(-169), _(-111), _(-2032), _(-2663), _(-9389), _(-24694), _(-72835), _(50137), _(-1082), _(7597), _(-1535), _( 919), _(-227), _( 53), _( 2) }, - { _(-16), _(-176), _(-153), _(-2057), _(-3004), _(-9139), _(-26482), _(-73415), _(48390), _(-2037), _(7271), _(-1644), _( 848), _(-228), _( 49), _( 1) }, - { _(-17), _(-183), _(-197), _(-2075), _(-3351), _(-8840), _(-28289), _(-73908), _(46617), _(-2935), _(6935), _(-1739), _( 779), _(-228), _( 45), _( 1) }, - { _(-19), _(-190), _(-244), _(-2085), _(-3705), _(-8492), _(-30112), _(-74313), _(44821), _(-3776), _(6589), _(-1822), _( 711), _(-227), _( 41), _( 1) }, - { _(-21), _(-196), _(-294), _(-2087), _(-4063), _(-8092), _(-31947), _(-74630), _(43006), _(-4561), _(6237), _(-1893), _( 645), _(-225), _( 38), _( 1) }, - { _(-24), _(-202), _(-347), _(-2080), _(-4425), _(-7640), _(-33791), _(-74856), _(41176), _(-5288), _(5879), _(-1952), _( 581), _(-222), _( 35), _( 1) }, - { _(-26), _(-208), _(-401), _(-2063), _(-4788), _(-7134), _(-35640), _(-74992), _(39336), _(-5959), _(5517), _(-2000), _( 519), _(-218), _( 31), _( 1) } +static const MPC_SAMPLE_FORMAT Di_opt [512] ICONST_ATTR = { +/* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ +/* 0 */ _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29), +/* 1 */ _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26), +/* 2 */ _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24), +/* 3 */ _( -1), _( -38), _( 225), _( -645), _( 1893), _(-6237), _( 4561), _(-43006), _(74630), _(31947), _(8092), _( 4063), _(2087), _( 294), _(196), _(21), +/* 4 */ _( -1), _( -41), _( 227), _( -711), _( 1822), _(-6589), _( 3776), _(-44821), _(74313), _(30112), _(8492), _( 3705), _(2085), _( 244), _(190), _(19), +/* 5 */ _( -1), _( -45), _( 228), _( -779), _( 1739), _(-6935), _( 2935), _(-46617), _(73908), _(28289), _(8840), _( 3351), _(2075), _( 197), _(183), _(17), +/* 6 */ _( -1), _( -49), _( 228), _( -848), _( 1644), _(-7271), _( 2037), _(-48390), _(73415), _(26482), _(9139), _( 3004), _(2057), _( 153), _(176), _(16), +/* 7 */ _( -2), _( -53), _( 227), _( -919), _( 1535), _(-7597), _( 1082), _(-50137), _(72835), _(24694), _(9389), _( 2663), _(2032), _( 111), _(169), _(14), +/* 8 */ _( -2), _( -58), _( 224), _( -991), _( 1414), _(-7910), _( 70), _(-51853), _(72169), _(22929), _(9592), _( 2330), _(2001), _( 72), _(161), _(13), +/* 9 */ _( -2), _( -63), _( 221), _(-1064), _( 1280), _(-8209), _( -998), _(-53534), _(71420), _(21189), _(9750), _( 2006), _(1962), _( 36), _(154), _(11), +/* 10 */ _( -2), _( -68), _( 215), _(-1137), _( 1131), _(-8491), _( -2122), _(-55178), _(70590), _(19478), _(9863), _( 1692), _(1919), _( 2), _(147), _(10), +/* 11 */ _( -3), _( -73), _( 208), _(-1210), _( 970), _(-8755), _( -3300), _(-56778), _(69679), _(17799), _(9935), _( 1388), _(1870), _( -29), _(139), _( 9), +/* 12 */ _( -3), _( -79), _( 200), _(-1283), _( 794), _(-8998), _( -4533), _(-58333), _(68692), _(16155), _(9966), _( 1095), _(1817), _( -57), _(132), _( 8), +/* 13 */ _( -4), _( -85), _( 189), _(-1356), _( 605), _(-9219), _( -5818), _(-59838), _(67629), _(14548), _(9959), _( 814), _(1759), _( -83), _(125), _( 7), +/* 14 */ _( -4), _( -91), _( 177), _(-1428), _( 402), _(-9416), _( -7154), _(-61289), _(66494), _(12980), _(9916), _( 545), _(1698), _(-106), _(117), _( 7), +/* 15 */ _( -5), _( -97), _( 163), _(-1498), _( 185), _(-9585), _( -8540), _(-62684), _(65290), _(11455), _(9838), _( 288), _(1634), _(-127), _(111), _( 6), +/* 16 */ _( -5), _(-104), _( 146), _(-1567), _( -45), _(-9727), _( -9975), _(-64019), _(64019), _( 9975), _(9727), _( 45), _(1567), _(-146), _(104), _( 5), +/* 17 */ _( -6), _(-111), _( 127), _(-1634), _( -288), _(-9838), _(-11455), _(-65290), _(62684), _( 8540), _(9585), _( -185), _(1498), _(-163), _( 97), _( 5), +/* 18 */ _( -7), _(-117), _( 106), _(-1698), _( -545), _(-9916), _(-12980), _(-66494), _(61289), _( 7154), _(9416), _( -402), _(1428), _(-177), _( 91), _( 4), +/* 19 */ _( -7), _(-125), _( 83), _(-1759), _( -814), _(-9959), _(-14548), _(-67629), _(59838), _( 5818), _(9219), _( -605), _(1356), _(-189), _( 85), _( 4), +/* 20 */ _( -8), _(-132), _( 57), _(-1817), _(-1095), _(-9966), _(-16155), _(-68692), _(58333), _( 4533), _(8998), _( -794), _(1283), _(-200), _( 79), _( 3), +/* 21 */ _( -9), _(-139), _( 29), _(-1870), _(-1388), _(-9935), _(-17799), _(-69679), _(56778), _( 3300), _(8755), _( -970), _(1210), _(-208), _( 73), _( 3), +/* 22 */ _(-10), _(-147), _( -2), _(-1919), _(-1692), _(-9863), _(-19478), _(-70590), _(55178), _( 2122), _(8491), _(-1131), _(1137), _(-215), _( 68), _( 2), +/* 23 */ _(-11), _(-154), _( -36), _(-1962), _(-2006), _(-9750), _(-21189), _(-71420), _(53534), _( 998), _(8209), _(-1280), _(1064), _(-221), _( 63), _( 2), +/* 24 */ _(-13), _(-161), _( -72), _(-2001), _(-2330), _(-9592), _(-22929), _(-72169), _(51853), _( -70), _(7910), _(-1414), _( 991), _(-224), _( 58), _( 2), +/* 25 */ _(-14), _(-169), _(-111), _(-2032), _(-2663), _(-9389), _(-24694), _(-72835), _(50137), _(-1082), _(7597), _(-1535), _( 919), _(-227), _( 53), _( 2), +/* 26 */ _(-16), _(-176), _(-153), _(-2057), _(-3004), _(-9139), _(-26482), _(-73415), _(48390), _(-2037), _(7271), _(-1644), _( 848), _(-228), _( 49), _( 1), +/* 27 */ _(-17), _(-183), _(-197), _(-2075), _(-3351), _(-8840), _(-28289), _(-73908), _(46617), _(-2935), _(6935), _(-1739), _( 779), _(-228), _( 45), _( 1), +/* 28 */ _(-19), _(-190), _(-244), _(-2085), _(-3705), _(-8492), _(-30112), _(-74313), _(44821), _(-3776), _(6589), _(-1822), _( 711), _(-227), _( 41), _( 1), +/* 29 */ _(-21), _(-196), _(-294), _(-2087), _(-4063), _(-8092), _(-31947), _(-74630), _(43006), _(-4561), _(6237), _(-1893), _( 645), _(-225), _( 38), _( 1), +/* 30 */ _(-24), _(-202), _(-347), _(-2080), _(-4425), _(-7640), _(-33791), _(-74856), _(41176), _(-5288), _(5879), _(-1952), _( 581), _(-222), _( 35), _( 1), +/* 31 */ _(-26), _(-208), _(-401), _(-2063), _(-4788), _(-7134), _(-35640), _(-74992), _(39336), _(-5959), _(5517), _(-2000), _( 519), _(-218), _( 31), _( 1) }; #undef _ @@ -457,69 +441,30 @@ mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V ) // total: 111 adds, 107 subs, 80 muls, 80 shifts } +#if defined(CPU_ARM) +extern void +mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, + const MPC_SAMPLE_FORMAT * V, + const MPC_SAMPLE_FORMAT * D); +#else static void -mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V) +mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, + const MPC_SAMPLE_FORMAT * V, + const MPC_SAMPLE_FORMAT * D) { - const MPC_SAMPLE_FORMAT *D = (const MPC_SAMPLE_FORMAT *) &Di_opt; mpc_int32_t k; #if defined(OPTIMIZE_FOR_SPEED) - #if defined(CPU_ARM) - // 32=32x32-multiply assembler for ARM - for ( k = 0; k < 32; k++, V++ ) - { - asm volatile ( - "ldmia %[D]!, { r0-r7 } \n\t" - "ldr r8, [%[V]] \n\t" - "mul r9, r0, r8 \n\t" - "ldr r8, [%[V], #96*4] \n\t" - "mla r9, r1, r8, r9 \n\t" - "ldr r8, [%[V], #128*4] \n\t" - "mla r9, r2, r8, r9 \n\t" - "ldr r8, [%[V], #224*4] \n\t" - "mla r9, r3, r8, r9 \n\t" - "ldr r8, [%[V], #256*4] \n\t" - "mla r9, r4, r8, r9 \n\t" - "ldr r8, [%[V], #352*4] \n\t" - "mla r9, r5, r8, r9 \n\t" - "ldr r8, [%[V], #384*4] \n\t" - "mla r9, r6, r8, r9 \n\t" - "ldr r8, [%[V], #480*4] \n\t" - "mla r9, r7, r8, r9 \n\t" - "ldmia %[D]!, { r0-r7 } \n\t" - "ldr r8, [%[V], #512*4] \n\t" - "mla r9, r0, r8, r9 \n\t" - "ldr r8, [%[V], #608*4] \n\t" - "mla r9, r1, r8, r9 \n\t" - "ldr r8, [%[V], #640*4] \n\t" - "mla r9, r2, r8, r9 \n\t" - "ldr r8, [%[V], #736*4] \n\t" - "mla r9, r3, r8, r9 \n\t" - "ldr r8, [%[V], #768*4] \n\t" - "mla r9, r4, r8, r9 \n\t" - "ldr r8, [%[V], #864*4] \n\t" - "mla r9, r5, r8, r9 \n\t" - "ldr r8, [%[V], #896*4] \n\t" - "mla r9, r6, r8, r9 \n\t" - "ldr r8, [%[V], #992*4] \n\t" - "mla r9, r7, r8, r9 \n\t" - "str r9, [%[Data]], #4 \n" - : [Data] "+r" (Data), [D] "+r" (D) - : [V] "r" (V) - : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"); - } - #else // 32=32x32-multiply (FIXED_POINT) for ( k = 0; k < 32; k++, D += 16, V++ ) { *Data = V[ 0]*D[ 0] + V[ 96]*D[ 1] + V[128]*D[ 2] + V[224]*D[ 3] - + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7] - + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11] - + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15]; + + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7] + + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11] + + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15]; Data += 1; - // total: 16 muls, 15 adds + // total: 32 * (16 muls, 15 adds) } - #endif #else #if defined(CPU_COLDFIRE) // 64=32x32-multiply assembler for Coldfire @@ -553,71 +498,25 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V) : [V] "a" (V), [D] "a" (D) : "d0", "d1", "d2", "d3", "a5"); } - #elif defined(CPU_ARM) - // 64=32x32-multiply assembler for ARM - for ( k = 0; k < 32; k++, V++ ) - { - asm volatile ( - "ldmia %[D]!, { r0-r3 } \n\t" - "ldr r4, [%[V]] \n\t" - "smull r5, r6, r0, r4 \n\t" - "ldr r4, [%[V], #96*4] \n\t" - "smlal r5, r6, r1, r4 \n\t" - "ldr r4, [%[V], #128*4] \n\t" - "smlal r5, r6, r2, r4 \n\t" - "ldr r4, [%[V], #224*4] \n\t" - "smlal r5, r6, r3, r4 \n\t" - - "ldmia %[D]!, { r0-r3 } \n\t" - "ldr r4, [%[V], #256*4] \n\t" - "smlal r5, r6, r0, r4 \n\t" - "ldr r4, [%[V], #352*4] \n\t" - "smlal r5, r6, r1, r4 \n\t" - "ldr r4, [%[V], #384*4] \n\t" - "smlal r5, r6, r2, r4 \n\t" - "ldr r4, [%[V], #480*4] \n\t" - "smlal r5, r6, r3, r4 \n\t" - - "ldmia %[D]!, { r0-r3 } \n\t" - "ldr r4, [%[V], #512*4] \n\t" - "smlal r5, r6, r0, r4 \n\t" - "ldr r4, [%[V], #608*4] \n\t" - "smlal r5, r6, r1, r4 \n\t" - "ldr r4, [%[V], #640*4] \n\t" - "smlal r5, r6, r2, r4 \n\t" - "ldr r4, [%[V], #736*4] \n\t" - "smlal r5, r6, r3, r4 \n\t" - - "ldmia %[D]!, { r0-r3 } \n\t" - "ldr r4, [%[V], #768*4] \n\t" - "smlal r5, r6, r0, r4 \n\t" - "ldr r4, [%[V], #864*4] \n\t" - "smlal r5, r6, r1, r4 \n\t" - "ldr r4, [%[V], #896*4] \n\t" - "smlal r5, r6, r2, r4 \n\t" - "ldr r4, [%[V], #992*4] \n\t" - "smlal r5, r6, r3, r4 \n\t" - "mov r4, r6, lsl #1 \n\t" - "orr r4, r4, r5, lsr #31\n\t" - "str r4, [%[Data]], #4 \n" - : [Data] "+r" (Data), [D] "+r" (D) - : [V] "r" (V) - : "r0", "r1", "r2", "r3", "r4", "r5", "r6"); - } #else // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C for ( k = 0; k < 32; k++, D += 16, V++ ) { - *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31) + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31) - + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31) + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31) - + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31) + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31) - + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31) + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31); + *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31) + + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31) + + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31) + + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31) + + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31) + + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31) + + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31) + + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31); Data += 1; // total: 16 muls, 15 adds, 16 shifts } #endif #endif } +#endif /* CPU_ARM */ static void mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y) @@ -630,7 +529,7 @@ mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, cons { V -= 64; mpc_calculate_new_V ( Y, V ); - mpc_decoder_windowing_D( OutData, V); + mpc_decoder_windowing_D( OutData, V, Di_opt ); } } } @@ -661,7 +560,7 @@ mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT *OutData) /* */ /*******************************************/ -static const unsigned char Parity [256] = { // parity +static const unsigned char Parity [256] ICONST_ATTR = { // parity 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1, 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S new file mode 100755 index 0000000..ce668e8 --- /dev/null +++ b/apps/codecs/libmusepack/synth_filter_arm.S @@ -0,0 +1,155 @@ +/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2008 by Andree Buschmann
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "mpc_config.h"
+
+ .section .text, "ax", %progbits
+
+/****************************************************************************
+ * void mpc_decoder_windowing_D(...)
+ *
+ * 2nd step within synthesis filter. Does the dewindowing.
+ * 32=32x32 multiplies (OPTIMIZE_FOR_SPEED)
+ * Uses pre-shifted V[] and D[] values.
+ ****************************************************************************/
+#if defined(OPTIMIZE_FOR_SPEED)
+ .align 2
+ .global mpc_decoder_windowing_D
+ .type mpc_decoder_windowing_D, %function
+mpc_decoder_windowing_D:
+ /* r0 = Data[] */
+ /* r1 = V[] */
+ /* r2 = D[] */
+ /* lr = counter */
+
+ stmfd sp!, {r4-r12, lr}
+
+ mov lr, #32
+.loop32:
+ ldmia r2!, { r3-r10 } /* load first 8 window coefficients */
+ ldr r11, [r1] /* 0 */
+ mul r12, r3, r11
+ ldr r11, [r1, #96*4] /* 1 */
+ mla r12, r4, r11, r12
+ ldr r11, [r1, #128*4] /* 2 */
+ mla r12, r5, r11, r12
+ ldr r11, [r1, #224*4] /* 3 */
+ mla r12, r6, r11, r12
+ ldr r11, [r1, #256*4] /* 4 */
+ mla r12, r7, r11, r12
+ ldr r11, [r1, #352*4] /* 5 */
+ mla r12, r8, r11, r12
+ ldr r11, [r1, #384*4] /* 6 */
+ mla r12, r9, r11, r12
+ ldr r11, [r1, #480*4] /* 7 */
+ mla r12, r10, r11, r12
+ ldmia r2!, { r3-r10 } /* load last 8 window coefficients */
+ ldr r11, [r1, #512*4] /* 8 */
+ mla r12, r3, r11, r12
+ ldr r11, [r1, #608*4] /* 9 */
+ mla r12, r4, r11, r12
+ ldr r11, [r1, #640*4] /* 10 */
+ mla r12, r5, r11, r12
+ ldr r11, [r1, #736*4] /* 11 */
+ mla r12, r6, r11, r12
+ ldr r11, [r1, #768*4] /* 12 */
+ mla r12, r7, r11, r12
+ ldr r11, [r1, #864*4] /* 13 */
+ mla r12, r8, r11, r12
+ ldr r11, [r1, #896*4] /* 14 */
+ mla r12, r9, r11, r12
+ ldr r11, [r1, #992*4] /* 15 */
+ mla r12, r10, r11, r12
+ str r12, [r0], #4 /* store Data */
+ add r1, r1, #4 /* V++ */
+
+ subs lr, lr, #1
+ bgt .loop32
+
+ ldmfd sp!, {r4-r12, pc}
+.mpc_dewindowing_end:
+ .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
+#else
+/****************************************************************************
+ * void mpc_decoder_windowing_D(...)
+ *
+ * 2nd step within synthesis filter. Does the dewindowing.
+ * 64=32x32 multiplies
+ * Drops lo-part of 64bit multiply results and will therefor loose 1 bit
+ * accuracy. The decoder output is binary identical as this imprecision is
+ * far below the output's 16bit resolution.
+ ****************************************************************************/
+ .align 2
+ .global mpc_decoder_windowing_D
+ .type mpc_decoder_windowing_D, %function
+mpc_decoder_windowing_D:
+ /* r0 = Data[] */
+ /* r1 = V[] */
+ /* r2 = D[] */
+ /* lr = counter */
+
+ stmfd sp!, {r4-r12, lr}
+
+ mov lr, #32
+.loop32:
+ ldmia r2!, { r3-r10 } /* load first 8 window coefficients */
+ ldr r11, [r1] /* 0 */
+ smull r11, r12, r3, r11
+ ldr r11, [r1, #96*4] /* 1 */
+ smlal r11, r12, r4, r11
+ ldr r11, [r1, #128*4] /* 2 */
+ smlal r11, r12, r5, r11
+ ldr r11, [r1, #224*4] /* 3 */
+ smlal r11, r12, r6, r11
+ ldr r11, [r1, #256*4] /* 4 */
+ smlal r11, r12, r7, r11
+ ldr r11, [r1, #352*4] /* 5 */
+ smlal r11, r12, r8, r11
+ ldr r11, [r1, #384*4] /* 6 */
+ smlal r11, r12, r9, r11
+ ldr r11, [r1, #480*4] /* 7 */
+ smlal r11, r12, r10, r11
+ ldmia r2!, { r3-r10 } /* load last 8 window coefficients */
+ ldr r11, [r1, #512*4] /* 8 */
+ smlal r11, r12, r3, r11
+ ldr r11, [r1, #608*4] /* 9 */
+ smlal r11, r12, r4, r11
+ ldr r11, [r1, #640*4] /* 10 */
+ smlal r11, r12, r5, r11
+ ldr r11, [r1, #736*4] /* 11 */
+ smlal r11, r12, r6, r11
+ ldr r11, [r1, #768*4] /* 12 */
+ smlal r11, r12, r7, r11
+ ldr r11, [r1, #864*4] /* 13 */
+ smlal r11, r12, r8, r11
+ ldr r11, [r1, #896*4] /* 14 */
+ smlal r11, r12, r9, r11
+ ldr r11, [r1, #992*4] /* 15 */
+ smlal r11, r12, r10, r11
+ mov r4, r12, lsl #1 /* get result from hi-part */
+ str r4, [r0], #4 /* store Data */
+ add r1, r1, #4 /* V++ */
+
+ subs lr, lr, #1
+ bgt .loop32
+
+ ldmfd sp!, {r4-r12, pc}
+.mpc_dewindowing_end:
+ .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
+#endif
|