Move ARM assembler of musepack synthesis filter to own file. Additionally add ICONST_ATTR to noise generator data.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17562 a1c6a512-1295-4272-9138-f99709370657
author: Andree Buschmann <AndreeBuschmann@t-online.de> 2008-05-17 21:26:34 +0000
committer: Andree Buschmann <AndreeBuschmann@t-online.de> 2008-05-17 21:26:34 +0000
commit: 18f13b149a4ce6d3b16c0b91de4d571d1860b66f (patch)
tree: 313f09dcd46b1e068d0abd0fb49de615d27495d5 /apps/codecs
parent: c769cf586fee44c55382d2ef98ce9dbca51b6f39 (diff)
download: rockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.zip
rockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.tar.gz
rockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.tar.bz2
rockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.tar.xz
5 files changed, 265 insertions, 161 deletions
diff --git a/apps/codecs/libmusepack/SOURCES b/apps/codecs/libmusepack/SOURCES
index 9c588e3..0de1143 100644
--- a/apps/codecs/libmusepack/SOURCES
+++ b/apps/codecs/libmusepack/SOURCES
@@ -5,4 +5,6 @@ mpc_decoder.c
 requant.c
 streaminfo.c
 synth_filter.c
-
+#if defined(CPU_ARM)
+synth_filter_arm.S
+#endif
diff --git a/apps/codecs/libmusepack/math.h b/apps/codecs/libmusepack/math.h
index a015d45..e4c2ffc 100644
--- a/apps/codecs/libmusepack/math.h
+++ b/apps/codecs/libmusepack/math.h
@@ -38,7 +38,7 @@
 #ifndef _mpcdec_math_h_
 #define _mpcdec_math_h_
 
-#define MPC_FIXED_POINT
+#include "mpc_config.h"
 
 #define MPC_FIXED_POINT_SHIFT 16
 
diff --git a/apps/codecs/libmusepack/mpc_config.h b/apps/codecs/libmusepack/mpc_config.h
new file mode 100755
index 0000000..6993775
--- /dev/null
+++ b/apps/codecs/libmusepack/mpc_config.h
@@ -0,0 +1,48 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2008 by Andree Buschmann
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#ifndef _mpc_config_h_
+#define _mpc_config_h_
+
+#include "config.h"
+
+/* choose fixed point or floating point */
+#define MPC_FIXED_POINT
+
+#ifndef MPC_FIXED_POINT
+#error FIXME, mpc will not with floating point now
+#endif
+
+/* choose speed vs. accuracy for MPC_FIXED_POINT
+ * speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy 
+ * equals about 5 dB SNR (15bit output precision) to not use the speed-optimization 
+ * -> comment OPTIMIZE_FOR_SPEED here for desired target */
+#if defined(MPC_FIXED_POINT)
+   #if defined(CPU_COLDFIRE)
+      // do nothing
+   #elif defined(CPU_ARM)
+      #define OPTIMIZE_FOR_SPEED
+   #else
+      #define OPTIMIZE_FOR_SPEED
+   #endif
+#else
+    // do nothing
+#endif
+
+#endif
diff --git a/apps/codecs/libmusepack/synth_filter.c b/apps/codecs/libmusepack/synth_filter.c
index faf014c..d48b563 100644
--- a/apps/codecs/libmusepack/synth_filter.c
+++ b/apps/codecs/libmusepack/synth_filter.c
@@ -39,22 +39,6 @@
 #include "musepack.h"
 #include "internal.h"
 
-/* S E T T I N G S */
-// choose speed vs. accuracy for MPC_FIXED_POINT
-// speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy equals about 5 dB SNR (15bit output precision)
-// to not use the speed-optimization -> comment OPTIMIZE_FOR_SPEED
-#if defined(MPC_FIXED_POINT)
-   #if defined(CPU_COLDFIRE)
-      // do nothing
-   #elif defined(CPU_ARM)
-      #define OPTIMIZE_FOR_SPEED
-   #else
-      #define OPTIMIZE_FOR_SPEED
-   #endif
-#else
-    // do nothing
-#endif
-
 /* C O N S T A N T S */
 #undef _
 
@@ -82,40 +66,40 @@
 #endif
     
 // Di_opt coefficients are +/- 2^17
-static const MPC_SAMPLE_FORMAT  Di_opt [32] [16] ICONST_ATTR = {
-    /*    0        1        2         3         4         5          6          7         8         9       10        11       12       13      14     15  */
-    { _(  0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _(  6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29) },
-    { _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _(  5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26) },
-    { _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _(  5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24) },
-    { _( -1), _( -38), _( 225), _( -645), _( 1893), _(-6237), _(  4561), _(-43006), _(74630), _(31947), _(8092), _( 4063), _(2087), _( 294), _(196), _(21) },
-    { _( -1), _( -41), _( 227), _( -711), _( 1822), _(-6589), _(  3776), _(-44821), _(74313), _(30112), _(8492), _( 3705), _(2085), _( 244), _(190), _(19) },
-    { _( -1), _( -45), _( 228), _( -779), _( 1739), _(-6935), _(  2935), _(-46617), _(73908), _(28289), _(8840), _( 3351), _(2075), _( 197), _(183), _(17) },
-    { _( -1), _( -49), _( 228), _( -848), _( 1644), _(-7271), _(  2037), _(-48390), _(73415), _(26482), _(9139), _( 3004), _(2057), _( 153), _(176), _(16) },
-    { _( -2), _( -53), _( 227), _( -919), _( 1535), _(-7597), _(  1082), _(-50137), _(72835), _(24694), _(9389), _( 2663), _(2032), _( 111), _(169), _(14) },
-    { _( -2), _( -58), _( 224), _( -991), _( 1414), _(-7910), _(    70), _(-51853), _(72169), _(22929), _(9592), _( 2330), _(2001), _(  72), _(161), _(13) },
-    { _( -2), _( -63), _( 221), _(-1064), _( 1280), _(-8209), _(  -998), _(-53534), _(71420), _(21189), _(9750), _( 2006), _(1962), _(  36), _(154), _(11) },
-    { _( -2), _( -68), _( 215), _(-1137), _( 1131), _(-8491), _( -2122), _(-55178), _(70590), _(19478), _(9863), _( 1692), _(1919), _(   2), _(147), _(10) },
-    { _( -3), _( -73), _( 208), _(-1210), _(  970), _(-8755), _( -3300), _(-56778), _(69679), _(17799), _(9935), _( 1388), _(1870), _( -29), _(139), _( 9) },
-    { _( -3), _( -79), _( 200), _(-1283), _(  794), _(-8998), _( -4533), _(-58333), _(68692), _(16155), _(9966), _( 1095), _(1817), _( -57), _(132), _( 8) },
-    { _( -4), _( -85), _( 189), _(-1356), _(  605), _(-9219), _( -5818), _(-59838), _(67629), _(14548), _(9959), _(  814), _(1759), _( -83), _(125), _( 7) },
-    { _( -4), _( -91), _( 177), _(-1428), _(  402), _(-9416), _( -7154), _(-61289), _(66494), _(12980), _(9916), _(  545), _(1698), _(-106), _(117), _( 7) },
-    { _( -5), _( -97), _( 163), _(-1498), _(  185), _(-9585), _( -8540), _(-62684), _(65290), _(11455), _(9838), _(  288), _(1634), _(-127), _(111), _( 6) },
-    { _( -5), _(-104), _( 146), _(-1567), _(  -45), _(-9727), _( -9975), _(-64019), _(64019), _( 9975), _(9727), _(   45), _(1567), _(-146), _(104), _( 5) },
-    { _( -6), _(-111), _( 127), _(-1634), _( -288), _(-9838), _(-11455), _(-65290), _(62684), _( 8540), _(9585), _( -185), _(1498), _(-163), _( 97), _( 5) },
-    { _( -7), _(-117), _( 106), _(-1698), _( -545), _(-9916), _(-12980), _(-66494), _(61289), _( 7154), _(9416), _( -402), _(1428), _(-177), _( 91), _( 4) },
-    { _( -7), _(-125), _(  83), _(-1759), _( -814), _(-9959), _(-14548), _(-67629), _(59838), _( 5818), _(9219), _( -605), _(1356), _(-189), _( 85), _( 4) },
-    { _( -8), _(-132), _(  57), _(-1817), _(-1095), _(-9966), _(-16155), _(-68692), _(58333), _( 4533), _(8998), _( -794), _(1283), _(-200), _( 79), _( 3) },
-    { _( -9), _(-139), _(  29), _(-1870), _(-1388), _(-9935), _(-17799), _(-69679), _(56778), _( 3300), _(8755), _( -970), _(1210), _(-208), _( 73), _( 3) },
-    { _(-10), _(-147), _(  -2), _(-1919), _(-1692), _(-9863), _(-19478), _(-70590), _(55178), _( 2122), _(8491), _(-1131), _(1137), _(-215), _( 68), _( 2) },
-    { _(-11), _(-154), _( -36), _(-1962), _(-2006), _(-9750), _(-21189), _(-71420), _(53534), _(  998), _(8209), _(-1280), _(1064), _(-221), _( 63), _( 2) },
-    { _(-13), _(-161), _( -72), _(-2001), _(-2330), _(-9592), _(-22929), _(-72169), _(51853), _(  -70), _(7910), _(-1414), _( 991), _(-224), _( 58), _( 2) },
-    { _(-14), _(-169), _(-111), _(-2032), _(-2663), _(-9389), _(-24694), _(-72835), _(50137), _(-1082), _(7597), _(-1535), _( 919), _(-227), _( 53), _( 2) },
-    { _(-16), _(-176), _(-153), _(-2057), _(-3004), _(-9139), _(-26482), _(-73415), _(48390), _(-2037), _(7271), _(-1644), _( 848), _(-228), _( 49), _( 1) },
-    { _(-17), _(-183), _(-197), _(-2075), _(-3351), _(-8840), _(-28289), _(-73908), _(46617), _(-2935), _(6935), _(-1739), _( 779), _(-228), _( 45), _( 1) },
-    { _(-19), _(-190), _(-244), _(-2085), _(-3705), _(-8492), _(-30112), _(-74313), _(44821), _(-3776), _(6589), _(-1822), _( 711), _(-227), _( 41), _( 1) },
-    { _(-21), _(-196), _(-294), _(-2087), _(-4063), _(-8092), _(-31947), _(-74630), _(43006), _(-4561), _(6237), _(-1893), _( 645), _(-225), _( 38), _( 1) },
-    { _(-24), _(-202), _(-347), _(-2080), _(-4425), _(-7640), _(-33791), _(-74856), _(41176), _(-5288), _(5879), _(-1952), _( 581), _(-222), _( 35), _( 1) },
-    { _(-26), _(-208), _(-401), _(-2063), _(-4788), _(-7134), _(-35640), _(-74992), _(39336), _(-5959), _(5517), _(-2000), _( 519), _(-218), _( 31), _( 1) }
+static const MPC_SAMPLE_FORMAT  Di_opt [512] ICONST_ATTR = {
+/*           0        1        2         3         4         5          6          7         8         9       10        11       12       13      14     15  */
+/*  0 */ _(  0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _(  6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29),
+/*  1 */ _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _(  5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26),
+/*  2 */ _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _(  5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24),
+/*  3 */ _( -1), _( -38), _( 225), _( -645), _( 1893), _(-6237), _(  4561), _(-43006), _(74630), _(31947), _(8092), _( 4063), _(2087), _( 294), _(196), _(21),
+/*  4 */ _( -1), _( -41), _( 227), _( -711), _( 1822), _(-6589), _(  3776), _(-44821), _(74313), _(30112), _(8492), _( 3705), _(2085), _( 244), _(190), _(19),
+/*  5 */ _( -1), _( -45), _( 228), _( -779), _( 1739), _(-6935), _(  2935), _(-46617), _(73908), _(28289), _(8840), _( 3351), _(2075), _( 197), _(183), _(17),
+/*  6 */ _( -1), _( -49), _( 228), _( -848), _( 1644), _(-7271), _(  2037), _(-48390), _(73415), _(26482), _(9139), _( 3004), _(2057), _( 153), _(176), _(16),
+/*  7 */ _( -2), _( -53), _( 227), _( -919), _( 1535), _(-7597), _(  1082), _(-50137), _(72835), _(24694), _(9389), _( 2663), _(2032), _( 111), _(169), _(14),
+/*  8 */ _( -2), _( -58), _( 224), _( -991), _( 1414), _(-7910), _(    70), _(-51853), _(72169), _(22929), _(9592), _( 2330), _(2001), _(  72), _(161), _(13),
+/*  9 */ _( -2), _( -63), _( 221), _(-1064), _( 1280), _(-8209), _(  -998), _(-53534), _(71420), _(21189), _(9750), _( 2006), _(1962), _(  36), _(154), _(11),
+/* 10 */ _( -2), _( -68), _( 215), _(-1137), _( 1131), _(-8491), _( -2122), _(-55178), _(70590), _(19478), _(9863), _( 1692), _(1919), _(   2), _(147), _(10),
+/* 11 */ _( -3), _( -73), _( 208), _(-1210), _(  970), _(-8755), _( -3300), _(-56778), _(69679), _(17799), _(9935), _( 1388), _(1870), _( -29), _(139), _( 9),
+/* 12 */ _( -3), _( -79), _( 200), _(-1283), _(  794), _(-8998), _( -4533), _(-58333), _(68692), _(16155), _(9966), _( 1095), _(1817), _( -57), _(132), _( 8),
+/* 13 */ _( -4), _( -85), _( 189), _(-1356), _(  605), _(-9219), _( -5818), _(-59838), _(67629), _(14548), _(9959), _(  814), _(1759), _( -83), _(125), _( 7),
+/* 14 */ _( -4), _( -91), _( 177), _(-1428), _(  402), _(-9416), _( -7154), _(-61289), _(66494), _(12980), _(9916), _(  545), _(1698), _(-106), _(117), _( 7),
+/* 15 */ _( -5), _( -97), _( 163), _(-1498), _(  185), _(-9585), _( -8540), _(-62684), _(65290), _(11455), _(9838), _(  288), _(1634), _(-127), _(111), _( 6),
+/* 16 */ _( -5), _(-104), _( 146), _(-1567), _(  -45), _(-9727), _( -9975), _(-64019), _(64019), _( 9975), _(9727), _(   45), _(1567), _(-146), _(104), _( 5),
+/* 17 */ _( -6), _(-111), _( 127), _(-1634), _( -288), _(-9838), _(-11455), _(-65290), _(62684), _( 8540), _(9585), _( -185), _(1498), _(-163), _( 97), _( 5),
+/* 18 */ _( -7), _(-117), _( 106), _(-1698), _( -545), _(-9916), _(-12980), _(-66494), _(61289), _( 7154), _(9416), _( -402), _(1428), _(-177), _( 91), _( 4),
+/* 19 */ _( -7), _(-125), _(  83), _(-1759), _( -814), _(-9959), _(-14548), _(-67629), _(59838), _( 5818), _(9219), _( -605), _(1356), _(-189), _( 85), _( 4),
+/* 20 */ _( -8), _(-132), _(  57), _(-1817), _(-1095), _(-9966), _(-16155), _(-68692), _(58333), _( 4533), _(8998), _( -794), _(1283), _(-200), _( 79), _( 3),
+/* 21 */ _( -9), _(-139), _(  29), _(-1870), _(-1388), _(-9935), _(-17799), _(-69679), _(56778), _( 3300), _(8755), _( -970), _(1210), _(-208), _( 73), _( 3),
+/* 22 */ _(-10), _(-147), _(  -2), _(-1919), _(-1692), _(-9863), _(-19478), _(-70590), _(55178), _( 2122), _(8491), _(-1131), _(1137), _(-215), _( 68), _( 2),
+/* 23 */ _(-11), _(-154), _( -36), _(-1962), _(-2006), _(-9750), _(-21189), _(-71420), _(53534), _(  998), _(8209), _(-1280), _(1064), _(-221), _( 63), _( 2),
+/* 24 */ _(-13), _(-161), _( -72), _(-2001), _(-2330), _(-9592), _(-22929), _(-72169), _(51853), _(  -70), _(7910), _(-1414), _( 991), _(-224), _( 58), _( 2),
+/* 25 */ _(-14), _(-169), _(-111), _(-2032), _(-2663), _(-9389), _(-24694), _(-72835), _(50137), _(-1082), _(7597), _(-1535), _( 919), _(-227), _( 53), _( 2),
+/* 26 */ _(-16), _(-176), _(-153), _(-2057), _(-3004), _(-9139), _(-26482), _(-73415), _(48390), _(-2037), _(7271), _(-1644), _( 848), _(-228), _( 49), _( 1),
+/* 27 */ _(-17), _(-183), _(-197), _(-2075), _(-3351), _(-8840), _(-28289), _(-73908), _(46617), _(-2935), _(6935), _(-1739), _( 779), _(-228), _( 45), _( 1),
+/* 28 */ _(-19), _(-190), _(-244), _(-2085), _(-3705), _(-8492), _(-30112), _(-74313), _(44821), _(-3776), _(6589), _(-1822), _( 711), _(-227), _( 41), _( 1),
+/* 29 */ _(-21), _(-196), _(-294), _(-2087), _(-4063), _(-8092), _(-31947), _(-74630), _(43006), _(-4561), _(6237), _(-1893), _( 645), _(-225), _( 38), _( 1),
+/* 30 */ _(-24), _(-202), _(-347), _(-2080), _(-4425), _(-7640), _(-33791), _(-74856), _(41176), _(-5288), _(5879), _(-1952), _( 581), _(-222), _( 35), _( 1),
+/* 31 */ _(-26), _(-208), _(-401), _(-2063), _(-4788), _(-7134), _(-35640), _(-74992), _(39336), _(-5959), _(5517), _(-2000), _( 519), _(-218), _( 31), _( 1)
 };
 
 #undef  _
@@ -457,69 +441,30 @@ mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
     //                    total: 111 adds, 107 subs, 80 muls,  80 shifts
 }
 
+#if defined(CPU_ARM)
+extern void
+mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, 
+                        const MPC_SAMPLE_FORMAT * V,
+                        const MPC_SAMPLE_FORMAT * D);
+#else
 static void 
-mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V)
+mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, 
+                        const MPC_SAMPLE_FORMAT * V,
+                        const MPC_SAMPLE_FORMAT * D)
 {
-   const MPC_SAMPLE_FORMAT *D = (const MPC_SAMPLE_FORMAT *) &Di_opt;
    mpc_int32_t k;
     
    #if defined(OPTIMIZE_FOR_SPEED)
-      #if defined(CPU_ARM)
-         // 32=32x32-multiply assembler for ARM
-         for ( k = 0; k < 32; k++, V++ ) 
-         {
-             asm volatile (
-               "ldmia %[D]!, { r0-r7 } \n\t"
-               "ldr r8, [%[V]]         \n\t"
-               "mul r9, r0, r8         \n\t"
-               "ldr r8, [%[V], #96*4]  \n\t"
-               "mla r9, r1, r8, r9     \n\t"
-               "ldr r8, [%[V], #128*4] \n\t"
-               "mla r9, r2, r8, r9     \n\t"
-               "ldr r8, [%[V], #224*4] \n\t"
-               "mla r9, r3, r8, r9     \n\t"
-               "ldr r8, [%[V], #256*4] \n\t"
-               "mla r9, r4, r8, r9     \n\t"
-               "ldr r8, [%[V], #352*4] \n\t"
-               "mla r9, r5, r8, r9     \n\t"
-               "ldr r8, [%[V], #384*4] \n\t"
-               "mla r9, r6, r8, r9     \n\t"
-               "ldr r8, [%[V], #480*4] \n\t"
-               "mla r9, r7, r8, r9     \n\t"
-               "ldmia %[D]!, { r0-r7 } \n\t"
-               "ldr r8, [%[V], #512*4] \n\t"
-               "mla r9, r0, r8, r9     \n\t"
-               "ldr r8, [%[V], #608*4] \n\t"
-               "mla r9, r1, r8, r9     \n\t"
-               "ldr r8, [%[V], #640*4] \n\t"
-               "mla r9, r2, r8, r9     \n\t"
-               "ldr r8, [%[V], #736*4] \n\t"
-               "mla r9, r3, r8, r9     \n\t"
-               "ldr r8, [%[V], #768*4] \n\t"
-               "mla r9, r4, r8, r9     \n\t"
-               "ldr r8, [%[V], #864*4] \n\t"
-               "mla r9, r5, r8, r9     \n\t"
-               "ldr r8, [%[V], #896*4] \n\t"
-               "mla r9, r6, r8, r9     \n\t"
-               "ldr r8, [%[V], #992*4] \n\t"
-               "mla r9, r7, r8, r9     \n\t"
-               "str r9, [%[Data]], #4  \n"  
-               : [Data] "+r" (Data), [D] "+r" (D)
-               : [V] "r" (V)
-               : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9");
-         }
-      #else
       // 32=32x32-multiply (FIXED_POINT)
       for ( k = 0; k < 32; k++, D += 16, V++ ) 
       {
          *Data = V[  0]*D[ 0] + V[ 96]*D[ 1] + V[128]*D[ 2] + V[224]*D[ 3]
-              + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7]
-              + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11]
-              + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15];
+               + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7]
+               + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11]
+               + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15];
          Data += 1;
-         // total: 16 muls, 15 adds
+         // total: 32 * (16 muls, 15 adds)
       }
-        #endif
    #else
       #if defined(CPU_COLDFIRE)
          // 64=32x32-multiply assembler for Coldfire
@@ -553,71 +498,25 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V)
                : [V] "a" (V), [D] "a" (D)
                : "d0", "d1", "d2", "d3", "a5");
          }
-      #elif defined(CPU_ARM)
-         // 64=32x32-multiply assembler for ARM
-         for ( k = 0; k < 32; k++, V++ ) 
-         {
-            asm volatile (
-               "ldmia %[D]!, { r0-r3 } \n\t"
-               "ldr r4, [%[V]]         \n\t"
-               "smull r5, r6, r0, r4   \n\t"
-               "ldr r4, [%[V], #96*4]  \n\t"
-               "smlal r5, r6, r1, r4   \n\t"
-               "ldr r4, [%[V], #128*4] \n\t"
-               "smlal r5, r6, r2, r4   \n\t"
-               "ldr r4, [%[V], #224*4] \n\t"
-               "smlal r5, r6, r3, r4   \n\t"
-               
-               "ldmia %[D]!, { r0-r3 } \n\t"
-               "ldr r4, [%[V], #256*4] \n\t"
-               "smlal r5, r6, r0, r4   \n\t"
-               "ldr r4, [%[V], #352*4] \n\t"
-               "smlal r5, r6, r1, r4   \n\t"
-               "ldr r4, [%[V], #384*4] \n\t"
-               "smlal r5, r6, r2, r4   \n\t"
-               "ldr r4, [%[V], #480*4] \n\t"
-               "smlal r5, r6, r3, r4   \n\t"
-               
-               "ldmia %[D]!, { r0-r3 } \n\t"
-               "ldr r4, [%[V], #512*4] \n\t"
-               "smlal r5, r6, r0, r4   \n\t"
-               "ldr r4, [%[V], #608*4] \n\t"
-               "smlal r5, r6, r1, r4   \n\t"
-               "ldr r4, [%[V], #640*4] \n\t"
-               "smlal r5, r6, r2, r4   \n\t"
-               "ldr r4, [%[V], #736*4] \n\t"
-               "smlal r5, r6, r3, r4   \n\t"
-               
-               "ldmia %[D]!, { r0-r3 } \n\t"
-               "ldr r4, [%[V], #768*4] \n\t"
-               "smlal r5, r6, r0, r4   \n\t"
-               "ldr r4, [%[V], #864*4] \n\t"
-               "smlal r5, r6, r1, r4   \n\t"
-               "ldr r4, [%[V], #896*4] \n\t"
-               "smlal r5, r6, r2, r4   \n\t"
-               "ldr r4, [%[V], #992*4] \n\t"
-               "smlal r5, r6, r3, r4   \n\t"
-               "mov r4, r6, lsl #1     \n\t"
-               "orr r4, r4, r5, lsr #31\n\t"
-               "str r4, [%[Data]], #4  \n"  
-               : [Data] "+r" (Data), [D] "+r" (D)
-               : [V] "r" (V)
-               : "r0", "r1", "r2", "r3", "r4", "r5", "r6");
-         }
       #else
          // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C
          for ( k = 0; k < 32; k++, D += 16, V++ ) 
          {
-            *Data = MPC_MULTIPLY_EX(V[  0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31) + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31)
-                  + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31) + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31)
-                  + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31) + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31)
-                  + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31) + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31);
+            *Data = MPC_MULTIPLY_EX(V[  0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31)
+                  + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31)
+                  + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31)
+                  + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31)
+                  + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31)
+                  + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31)
+                  + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31)
+                  + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31);
             Data += 1;
             // total: 16 muls, 15 adds, 16 shifts
          }
       #endif
    #endif
 }
+#endif /* CPU_ARM */
 
 static void 
 mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y)
@@ -630,7 +529,7 @@ mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, cons
         {
             V -= 64;
             mpc_calculate_new_V ( Y, V );
-            mpc_decoder_windowing_D( OutData, V);
+            mpc_decoder_windowing_D( OutData, V, Di_opt );
         }
      }
 }
@@ -661,7 +560,7 @@ mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT *OutData)
 /*                                         */
 /*******************************************/
 
-static const unsigned char    Parity [256] = {  // parity
+static const unsigned char Parity [256] ICONST_ATTR = {  // parity
     0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
     1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
     1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S
new file mode 100755
index 0000000..ce668e8
--- /dev/null
+++ b/apps/codecs/libmusepack/synth_filter_arm.S
@@ -0,0 +1,155 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2008 by Andree Buschmann
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+ 
+#include "mpc_config.h"
+
+    .section .text, "ax", %progbits
+    
+/****************************************************************************
+ * void mpc_decoder_windowing_D(...)
+ * 
+ * 2nd step within synthesis filter. Does the dewindowing.
+ * 32=32x32 multiplies (OPTIMIZE_FOR_SPEED)
+ * Uses pre-shifted V[] and D[] values.
+ ****************************************************************************/
+#if defined(OPTIMIZE_FOR_SPEED)
+    .align  2
+    .global mpc_decoder_windowing_D
+    .type   mpc_decoder_windowing_D, %function
+mpc_decoder_windowing_D:
+    /* r0 = Data[] */
+    /* r1 = V[] */
+    /* r2 = D[] */
+    /* lr = counter */
+    
+    stmfd   sp!, {r4-r12, lr}
+    
+    mov lr, #32
+.loop32:
+    ldmia r2!, { r3-r10 }   /* load first 8 window coefficients */
+    ldr r11, [r1]           /*  0 */
+    mul r12, r3, r11         
+    ldr r11, [r1, #96*4]    /*  1 */
+    mla r12, r4, r11, r12     
+    ldr r11, [r1, #128*4]   /*  2 */
+    mla r12, r5, r11, r12     
+    ldr r11, [r1, #224*4]   /*  3 */
+    mla r12, r6, r11, r12     
+    ldr r11, [r1, #256*4]   /*  4 */
+    mla r12, r7, r11, r12     
+    ldr r11, [r1, #352*4]   /*  5 */
+    mla r12, r8, r11, r12     
+    ldr r11, [r1, #384*4]   /*  6 */
+    mla r12, r9, r11, r12     
+    ldr r11, [r1, #480*4]   /*  7 */
+    mla r12, r10, r11, r12     
+    ldmia r2!, { r3-r10 }   /* load last 8 window coefficients */
+    ldr r11, [r1, #512*4]   /*  8 */
+    mla r12, r3, r11, r12     
+    ldr r11, [r1, #608*4]   /*  9 */
+    mla r12, r4, r11, r12     
+    ldr r11, [r1, #640*4]   /* 10 */
+    mla r12, r5, r11, r12     
+    ldr r11, [r1, #736*4]   /* 11 */
+    mla r12, r6, r11, r12     
+    ldr r11, [r1, #768*4]   /* 12 */
+    mla r12, r7, r11, r12     
+    ldr r11, [r1, #864*4]   /* 13 */
+    mla r12, r8, r11, r12     
+    ldr r11, [r1, #896*4]   /* 14 */
+    mla r12, r9, r11, r12     
+    ldr r11, [r1, #992*4]   /* 15 */
+    mla r12, r10, r11, r12     
+    str r12, [r0], #4       /* store Data */
+    add r1, r1, #4          /* V++ */
+    
+    subs lr, lr, #1
+    bgt .loop32
+    
+    ldmfd   sp!, {r4-r12, pc}
+.mpc_dewindowing_end:
+    .size   mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
+#else
+/****************************************************************************
+ * void mpc_decoder_windowing_D(...)
+ * 
+ * 2nd step within synthesis filter. Does the dewindowing.
+ * 64=32x32 multiplies
+ * Drops lo-part of 64bit multiply results and will therefor loose 1 bit
+ * accuracy. The decoder output is binary identical as this imprecision is
+ * far below the output's 16bit resolution.
+ ****************************************************************************/
+    .align  2
+    .global mpc_decoder_windowing_D
+    .type   mpc_decoder_windowing_D, %function
+mpc_decoder_windowing_D:
+    /* r0 = Data[] */
+    /* r1 = V[] */
+    /* r2 = D[] */
+    /* lr = counter */
+    
+    stmfd   sp!, {r4-r12, lr}
+    
+    mov lr, #32
+.loop32:
+    ldmia r2!, { r3-r10 }   /* load first 8 window coefficients */
+    ldr r11, [r1]           /*  0 */
+    smull r11, r12, r3, r11  
+    ldr r11, [r1, #96*4]    /*  1 */
+    smlal r11, r12, r4, r11  
+    ldr r11, [r1, #128*4]   /*  2 */
+    smlal r11, r12, r5, r11  
+    ldr r11, [r1, #224*4]   /*  3 */
+    smlal r11, r12, r6, r11   
+    ldr r11, [r1, #256*4]   /*  4 */
+    smlal r11, r12, r7, r11  
+    ldr r11, [r1, #352*4]   /*  5 */
+    smlal r11, r12, r8, r11  
+    ldr r11, [r1, #384*4]   /*  6 */
+    smlal r11, r12, r9, r11  
+    ldr r11, [r1, #480*4]   /*  7 */
+    smlal r11, r12, r10, r11
+    ldmia r2!, { r3-r10 }   /* load last 8 window coefficients */
+    ldr r11, [r1, #512*4]   /*  8 */
+    smlal r11, r12, r3, r11
+    ldr r11, [r1, #608*4]   /*  9 */
+    smlal r11, r12, r4, r11  
+    ldr r11, [r1, #640*4]   /* 10 */
+    smlal r11, r12, r5, r11  
+    ldr r11, [r1, #736*4]   /* 11 */
+    smlal r11, r12, r6, r11  
+    ldr r11, [r1, #768*4]   /* 12 */
+    smlal r11, r12, r7, r11  
+    ldr r11, [r1, #864*4]   /* 13 */
+    smlal r11, r12, r8, r11  
+    ldr r11, [r1, #896*4]   /* 14 */
+    smlal r11, r12, r9, r11  
+    ldr r11, [r1, #992*4]   /* 15 */
+    smlal r11, r12, r10, r11
+    mov r4, r12, lsl #1     /* get result from hi-part */
+    str r4, [r0], #4        /* store Data */
+    add r1, r1, #4          /* V++ */
+    
+    subs lr, lr, #1
+    bgt .loop32
+    
+    ldmfd   sp!, {r4-r12, pc}
+.mpc_dewindowing_end:
+    .size   mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
+#endif
author	Andree Buschmann <AndreeBuschmann@t-online.de>	2008-05-17 21:26:34 +0000
committer	Andree Buschmann <AndreeBuschmann@t-online.de>	2008-05-17 21:26:34 +0000
commit	18f13b149a4ce6d3b16c0b91de4d571d1860b66f (patch)
tree	313f09dcd46b1e068d0abd0fb49de615d27495d5 /apps/codecs
parent	c769cf586fee44c55382d2ef98ce9dbca51b6f39 (diff)
download	rockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.zip rockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.tar.gz rockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.tar.bz2 rockbox-18f13b149a4ce6d3b16c0b91de4d571d1860b66f.tar.xz