summaryrefslogtreecommitdiff
path: root/apps/codecs/libFLAC/coldfire.S
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2005-03-03 12:17:45 +0000
committerThom Johansen <thomj@rockbox.org>2005-03-03 12:17:45 +0000
commit340d8245429db340d4e0dafb8633c4ac2bab2061 (patch)
tree8a623d45c57609513039a0ce2e004da71ea8ffba /apps/codecs/libFLAC/coldfire.S
parentd061b371d6f53e0c2f310f4eb5776430772a8ac5 (diff)
downloadrockbox-340d8245429db340d4e0dafb8633c4ac2bab2061.zip
rockbox-340d8245429db340d4e0dafb8633c4ac2bab2061.tar.gz
rockbox-340d8245429db340d4e0dafb8633c4ac2bab2061.tar.bz2
rockbox-340d8245429db340d4e0dafb8633c4ac2bab2061.tar.xz
Added new coldfire assembly LPC decoder routine to libFLAC.
Added clear accumulator policy. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6108 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libFLAC/coldfire.S')
-rw-r--r--apps/codecs/libFLAC/coldfire.S245
1 files changed, 245 insertions, 0 deletions
diff --git a/apps/codecs/libFLAC/coldfire.S b/apps/codecs/libFLAC/coldfire.S
new file mode 100644
index 0000000..b36f00e
--- /dev/null
+++ b/apps/codecs/libFLAC/coldfire.S
@@ -0,0 +1,245 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2005 by Thom Johansen
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+/* The following is a first attempt at an assembler optimized version of
+ FLAC__lpc_restore_signal programmed for MFC5249 or any other similar
+ ColdFire V2 core with the EMAC unit.
+*/
+ .section .icode,"ax",@progbits
+ .global FLAC__lpc_restore_signal_mcf5249
+ .align 2
+FLAC__lpc_restore_signal_mcf5249:
+ lea.l (-44, %sp), %sp
+ movem.l %d2-%d7/%a2-%a6, (%sp)
+ move.l (44+4, %sp), %a0 /* residual */
+ move.l (44+8, %sp), %d0 /* data_len */
+ move.l (44+12, %sp), %a1 /* qlp_coef */
+ move.l (44+16, %sp), %d2 /* order */
+ move.l (44+20, %sp), %d1 /* lp_quantization */
+ move.l (44+24, %sp), %a2 /* data */
+ /* the data pointer always lags behind history pointer by 'order' samples.
+ since we have one loop for each order, we can hard code this and free
+ a register by not saving data pointer.
+ */
+ move.l %d2, %d3
+ neg.l %d3
+ lea.l (%a2, %d3.l*4), %a2 /* history */
+ clr.l %d3
+ move.l %d3, %macsr /* we'll need integer mode for this */
+ tst.l %d0
+ jeq .Lexit /* zero samples to process */
+ movq.l #8, %d3
+ cmp.l %d3, %d2 /* coldfire v2 only has long cmp version */
+ jgt .Ldefault /* order is over 8, jump to default case */
+ lea.l .Ljumptable, %a4
+ move.l (%a4, %d2.l*4), %a4
+ jmp (%a4)
+ .align 4 /* avoid unaligned fetch */
+.Ljumptable:
+ .long .Lexit
+ .long .Lorder1
+ .long .Lorder2
+ .long .Lorder3
+ .long .Lorder4
+ .long .Lorder5
+ .long .Lorder6
+ .long .Lorder7
+ .long .Lorder8
+
+.Lorder8:
+ movem.l (%a1), %d3-%d7/%a3-%a5 /* load lpc coefs */
+ movea.l (%a2), %a6 /* load first history sample */
+.Lloop8:
+ mac.l %a6, %a5, (1*4, %a2), %a6, %acc0
+ mac.l %a6, %a4, (2*4, %a2), %a6, %acc0
+ mac.l %a6, %a3, (3*4, %a2), %a6, %acc0
+ mac.l %a6, %d7, (4*4, %a2), %a6, %acc0
+ mac.l %a6, %d6, (5*4, %a2), %a6, %acc0
+ mac.l %a6, %d5, (6*4, %a2), %a6, %acc0
+ mac.l %a6, %d4, (7*4, %a2), %a6, %acc0
+ mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 /* load for the next iteration */
+ addq.l #4, %a2 /* increment history pointer */
+ movclr.l %acc0, %d2 /* get sum */
+ asr.l %d1, %d2 /* shift sum by lp_quantization bits */
+ add.l (%a0)+, %d2 /* add residual and increment residual pointer */
+ move.l %d2, (28, %a2) /* save result to data */
+ subq.l #1, %d0 /* decrement counter */
+ jne .Lloop8 /* are we done? */
+ jra .Lexit
+
+.Lorder7:
+ movem.l (%a1), %d3-%d7/%a3-%a4
+ movea.l (%a2), %a6
+.Lloop7:
+ mac.l %a6, %a4, (1*4, %a2), %a6, %acc0
+ mac.l %a6, %a3, (2*4, %a2), %a6, %acc0
+ mac.l %a6, %d7, (3*4, %a2), %a6, %acc0
+ mac.l %a6, %d6, (4*4, %a2), %a6, %acc0
+ mac.l %a6, %d5, (5*4, %a2), %a6, %acc0
+ mac.l %a6, %d4, (6*4, %a2), %a6, %acc0
+ mac.l %a6, %d3, (1*4, %a2), %a6, %acc0
+ addq.l #4, %a2
+ movclr.l %acc0, %d2
+ asr.l %d1, %d2
+ add.l (%a0)+, %d2
+ move.l %d2, (24, %a2)
+ subq.l #1, %d0
+ jne .Lloop7
+ jra .Lexit
+
+.Lorder6:
+ movem.l (%a1), %d3-%d7/%a3
+ movea.l (%a2), %a6
+.Lloop6:
+ mac.l %a6, %a3, (1*4, %a2), %a6, %acc0
+ mac.l %a6, %d7, (2*4, %a2), %a6, %acc0
+ mac.l %a6, %d6, (3*4, %a2), %a6, %acc0
+ mac.l %a6, %d5, (4*4, %a2), %a6, %acc0
+ mac.l %a6, %d4, (5*4, %a2), %a6, %acc0
+ mac.l %a6, %d3, (1*4, %a2), %a6, %acc0
+ addq.l #4, %a2
+ movclr.l %acc0, %d2
+ asr.l %d1, %d2
+ add.l (%a0)+, %d2
+ move.l %d2, (20, %a2)
+ subq.l #1, %d0
+ jne .Lloop6
+ jra .Lexit
+
+.Lorder5:
+ movem.l (%a1), %d3-%d7
+ movea.l (%a2), %a6
+.Lloop5:
+ mac.l %a6, %d7, (1*4, %a2), %a6, %acc0
+ mac.l %a6, %d6, (2*4, %a2), %a6, %acc0
+ mac.l %a6, %d5, (3*4, %a2), %a6, %acc0
+ mac.l %a6, %d4, (4*4, %a2), %a6, %acc0
+ mac.l %a6, %d3, (1*4, %a2), %a6, %acc0
+ addq.l #4, %a2
+ movclr.l %acc0, %d2
+ asr.l %d1, %d2
+ add.l (%a0)+, %d2
+ move.l %d2, (16, %a2)
+ subq.l #1, %d0
+ jne .Lloop5
+ jra .Lexit
+
+.Lorder4:
+ movem.l (%a1), %d3-%d6
+ movea.l (%a2), %a6
+.Lloop4:
+ mac.l %a6, %d6, (1*4, %a2), %a6, %acc0
+ mac.l %a6, %d5, (2*4, %a2), %a6, %acc0
+ mac.l %a6, %d4, (3*4, %a2), %a6, %acc0
+ mac.l %a6, %d3, (1*4, %a2), %a6, %acc0
+ addq.l #4, %a2
+ movclr.l %acc0, %d2
+ asr.l %d1, %d2
+ add.l (%a0)+, %d2
+ move.l %d2, (12, %a2)
+ subq.l #1, %d0
+ jne .Lloop4
+ jra .Lexit
+
+.Lorder3:
+ movem.l (%a1), %d3-%d5
+ movea.l (%a2), %a6
+.Lloop3:
+ mac.l %a6, %d5, (1*4, %a2), %a6, %acc0
+ mac.l %a6, %d4, (2*4, %a2), %a6, %acc0
+ mac.l %a6, %d3, (1*4, %a2), %a6, %acc0
+ addq.l #4, %a2
+ movclr.l %acc0, %d2
+ asr.l %d1, %d2
+ add.l (%a0)+, %d2
+ move.l %d2, (8, %a2)
+ subq.l #1, %d0
+ jne .Lloop3
+ jra .Lexit
+
+.Lorder2:
+ movem.l (%a1), %d3-%d4
+ movea.l (%a2), %a6
+.Lloop2:
+ mac.l %a6, %d4, (1*4, %a2), %a6, %acc0
+ mac.l %a6, %d3, %acc0 /* data for next iteration is already loaded */
+ addq.l #4, %a2
+ movclr.l %acc0, %d2
+ asr.l %d1, %d2
+ add.l (%a0)+, %d2
+ move.l %d2, (4, %a2)
+ subq.l #1, %d0
+ jne .Lloop2
+ jra .Lexit
+
+.Lorder1:
+ /* no point in using mac here */
+ move.l (%a1), %d3
+.Lloop1:
+ move.l %d3, %d2
+ muls.l (%a2)+, %d2
+ asr.l %d1, %d2
+ add.l (%a0)+, %d2
+ move.l %d2, (%a2)
+ subq.l #1, %d0
+ jne .Lloop1
+ jra .Lexit
+
+.Ldefault:
+ /* we do the filtering in an unrolled by 4 loop as far as we can, and then
+ do the rest in an ordinary on by one sample loop.
+ */
+ lea.l (%a1, %d2.l*4), %a3 /* need to start in the other end of coefs */
+ movea.l %a2, %a4 /* working copy of history pointer */
+ move.l %d2, %d3
+ lsr.l #2, %d3 /* coefs/4, number of iterations needed in next loop */
+ movea.l (%a4)+, %a6 /* preload lpc coef for loop */
+.Ldloop1:
+ lea.l (-16, %a3), %a3 /* move lpc coef pointer four samples backwards */
+ movem.l (%a3), %d4-%d7 /* load four coefs */
+ mac.l %a6, %d7, (%a4)+, %a6, %acc0
+ mac.l %a6, %d6, (%a4)+, %a6, %acc0
+ mac.l %a6, %d5, (%a4)+, %a6, %acc0
+ mac.l %a6, %d4, (%a4)+, %a6, %acc0
+ subq.l #1, %d3 /* any more unrolled loop operations left? */
+ jne .Ldloop1
+
+ move.l %d2, %d3
+ movq.l #3, %d4 /* mask 0x00000003 */
+ and.l %d4, %d3 /* get the remaining samples to be filtered */
+ jeq .Ldsave /* no remaining samples */
+.Ldloop2:
+ move.l -(%a3), %d4 /* get lpc coef */
+ mac.l %a6, %d4, (%a4)+, %a6, %acc0
+ subq.l #1, %d3 /* any more iterations left? */
+ jne .Ldloop2
+.Ldsave:
+ movclr.l %acc0, %d3 /* get result */
+ asr.l %d1, %d3 /* shift lp_quantization bits right */
+ add.l (%a0)+, %d3 /* add residual */
+ move.l %d3, (-4, %a4) /* history pointer is one sample past data pointer */
+ addq.l #4, %a2 /* increment history pointer */
+ subq.l #1, %d0 /* decrement data_len */
+ jne .Ldefault /* are we done? */
+ /* if so, fall through to exit */
+
+.Lexit:
+ movem.l (%sp), %d2-%d7/%a2-%a6
+ lea.l (44, %sp), %sp
+ rts