diff options
| author | Thom Johansen <thomj@rockbox.org> | 2005-03-03 12:17:45 +0000 |
|---|---|---|
| committer | Thom Johansen <thomj@rockbox.org> | 2005-03-03 12:17:45 +0000 |
| commit | 340d8245429db340d4e0dafb8633c4ac2bab2061 (patch) | |
| tree | 8a623d45c57609513039a0ce2e004da71ea8ffba /apps/codecs/libFLAC/coldfire.S | |
| parent | d061b371d6f53e0c2f310f4eb5776430772a8ac5 (diff) | |
| download | rockbox-340d8245429db340d4e0dafb8633c4ac2bab2061.zip rockbox-340d8245429db340d4e0dafb8633c4ac2bab2061.tar.gz rockbox-340d8245429db340d4e0dafb8633c4ac2bab2061.tar.bz2 rockbox-340d8245429db340d4e0dafb8633c4ac2bab2061.tar.xz | |
Added new coldfire assembly LPC decoder routine to libFLAC.
Added clear accumulator policy.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6108 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libFLAC/coldfire.S')
| -rw-r--r-- | apps/codecs/libFLAC/coldfire.S | 245 |
1 files changed, 245 insertions, 0 deletions
diff --git a/apps/codecs/libFLAC/coldfire.S b/apps/codecs/libFLAC/coldfire.S new file mode 100644 index 0000000..b36f00e --- /dev/null +++ b/apps/codecs/libFLAC/coldfire.S @@ -0,0 +1,245 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2005 by Thom Johansen + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +/* The following is a first attempt at an assembler optimized version of + FLAC__lpc_restore_signal programmed for MFC5249 or any other similar + ColdFire V2 core with the EMAC unit. +*/ + .section .icode,"ax",@progbits + .global FLAC__lpc_restore_signal_mcf5249 + .align 2 +FLAC__lpc_restore_signal_mcf5249: + lea.l (-44, %sp), %sp + movem.l %d2-%d7/%a2-%a6, (%sp) + move.l (44+4, %sp), %a0 /* residual */ + move.l (44+8, %sp), %d0 /* data_len */ + move.l (44+12, %sp), %a1 /* qlp_coef */ + move.l (44+16, %sp), %d2 /* order */ + move.l (44+20, %sp), %d1 /* lp_quantization */ + move.l (44+24, %sp), %a2 /* data */ + /* the data pointer always lags behind history pointer by 'order' samples. + since we have one loop for each order, we can hard code this and free + a register by not saving data pointer. + */ + move.l %d2, %d3 + neg.l %d3 + lea.l (%a2, %d3.l*4), %a2 /* history */ + clr.l %d3 + move.l %d3, %macsr /* we'll need integer mode for this */ + tst.l %d0 + jeq .Lexit /* zero samples to process */ + movq.l #8, %d3 + cmp.l %d3, %d2 /* coldfire v2 only has long cmp version */ + jgt .Ldefault /* order is over 8, jump to default case */ + lea.l .Ljumptable, %a4 + move.l (%a4, %d2.l*4), %a4 + jmp (%a4) + .align 4 /* avoid unaligned fetch */ +.Ljumptable: + .long .Lexit + .long .Lorder1 + .long .Lorder2 + .long .Lorder3 + .long .Lorder4 + .long .Lorder5 + .long .Lorder6 + .long .Lorder7 + .long .Lorder8 + +.Lorder8: + movem.l (%a1), %d3-%d7/%a3-%a5 /* load lpc coefs */ + movea.l (%a2), %a6 /* load first history sample */ +.Lloop8: + mac.l %a6, %a5, (1*4, %a2), %a6, %acc0 + mac.l %a6, %a4, (2*4, %a2), %a6, %acc0 + mac.l %a6, %a3, (3*4, %a2), %a6, %acc0 + mac.l %a6, %d7, (4*4, %a2), %a6, %acc0 + mac.l %a6, %d6, (5*4, %a2), %a6, %acc0 + mac.l %a6, %d5, (6*4, %a2), %a6, %acc0 + mac.l %a6, %d4, (7*4, %a2), %a6, %acc0 + mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 /* load for the next iteration */ + addq.l #4, %a2 /* increment history pointer */ + movclr.l %acc0, %d2 /* get sum */ + asr.l %d1, %d2 /* shift sum by lp_quantization bits */ + add.l (%a0)+, %d2 /* add residual and increment residual pointer */ + move.l %d2, (28, %a2) /* save result to data */ + subq.l #1, %d0 /* decrement counter */ + jne .Lloop8 /* are we done? */ + jra .Lexit + +.Lorder7: + movem.l (%a1), %d3-%d7/%a3-%a4 + movea.l (%a2), %a6 +.Lloop7: + mac.l %a6, %a4, (1*4, %a2), %a6, %acc0 + mac.l %a6, %a3, (2*4, %a2), %a6, %acc0 + mac.l %a6, %d7, (3*4, %a2), %a6, %acc0 + mac.l %a6, %d6, (4*4, %a2), %a6, %acc0 + mac.l %a6, %d5, (5*4, %a2), %a6, %acc0 + mac.l %a6, %d4, (6*4, %a2), %a6, %acc0 + mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 + addq.l #4, %a2 + movclr.l %acc0, %d2 + asr.l %d1, %d2 + add.l (%a0)+, %d2 + move.l %d2, (24, %a2) + subq.l #1, %d0 + jne .Lloop7 + jra .Lexit + +.Lorder6: + movem.l (%a1), %d3-%d7/%a3 + movea.l (%a2), %a6 +.Lloop6: + mac.l %a6, %a3, (1*4, %a2), %a6, %acc0 + mac.l %a6, %d7, (2*4, %a2), %a6, %acc0 + mac.l %a6, %d6, (3*4, %a2), %a6, %acc0 + mac.l %a6, %d5, (4*4, %a2), %a6, %acc0 + mac.l %a6, %d4, (5*4, %a2), %a6, %acc0 + mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 + addq.l #4, %a2 + movclr.l %acc0, %d2 + asr.l %d1, %d2 + add.l (%a0)+, %d2 + move.l %d2, (20, %a2) + subq.l #1, %d0 + jne .Lloop6 + jra .Lexit + +.Lorder5: + movem.l (%a1), %d3-%d7 + movea.l (%a2), %a6 +.Lloop5: + mac.l %a6, %d7, (1*4, %a2), %a6, %acc0 + mac.l %a6, %d6, (2*4, %a2), %a6, %acc0 + mac.l %a6, %d5, (3*4, %a2), %a6, %acc0 + mac.l %a6, %d4, (4*4, %a2), %a6, %acc0 + mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 + addq.l #4, %a2 + movclr.l %acc0, %d2 + asr.l %d1, %d2 + add.l (%a0)+, %d2 + move.l %d2, (16, %a2) + subq.l #1, %d0 + jne .Lloop5 + jra .Lexit + +.Lorder4: + movem.l (%a1), %d3-%d6 + movea.l (%a2), %a6 +.Lloop4: + mac.l %a6, %d6, (1*4, %a2), %a6, %acc0 + mac.l %a6, %d5, (2*4, %a2), %a6, %acc0 + mac.l %a6, %d4, (3*4, %a2), %a6, %acc0 + mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 + addq.l #4, %a2 + movclr.l %acc0, %d2 + asr.l %d1, %d2 + add.l (%a0)+, %d2 + move.l %d2, (12, %a2) + subq.l #1, %d0 + jne .Lloop4 + jra .Lexit + +.Lorder3: + movem.l (%a1), %d3-%d5 + movea.l (%a2), %a6 +.Lloop3: + mac.l %a6, %d5, (1*4, %a2), %a6, %acc0 + mac.l %a6, %d4, (2*4, %a2), %a6, %acc0 + mac.l %a6, %d3, (1*4, %a2), %a6, %acc0 + addq.l #4, %a2 + movclr.l %acc0, %d2 + asr.l %d1, %d2 + add.l (%a0)+, %d2 + move.l %d2, (8, %a2) + subq.l #1, %d0 + jne .Lloop3 + jra .Lexit + +.Lorder2: + movem.l (%a1), %d3-%d4 + movea.l (%a2), %a6 +.Lloop2: + mac.l %a6, %d4, (1*4, %a2), %a6, %acc0 + mac.l %a6, %d3, %acc0 /* data for next iteration is already loaded */ + addq.l #4, %a2 + movclr.l %acc0, %d2 + asr.l %d1, %d2 + add.l (%a0)+, %d2 + move.l %d2, (4, %a2) + subq.l #1, %d0 + jne .Lloop2 + jra .Lexit + +.Lorder1: + /* no point in using mac here */ + move.l (%a1), %d3 +.Lloop1: + move.l %d3, %d2 + muls.l (%a2)+, %d2 + asr.l %d1, %d2 + add.l (%a0)+, %d2 + move.l %d2, (%a2) + subq.l #1, %d0 + jne .Lloop1 + jra .Lexit + +.Ldefault: + /* we do the filtering in an unrolled by 4 loop as far as we can, and then + do the rest in an ordinary on by one sample loop. + */ + lea.l (%a1, %d2.l*4), %a3 /* need to start in the other end of coefs */ + movea.l %a2, %a4 /* working copy of history pointer */ + move.l %d2, %d3 + lsr.l #2, %d3 /* coefs/4, number of iterations needed in next loop */ + movea.l (%a4)+, %a6 /* preload lpc coef for loop */ +.Ldloop1: + lea.l (-16, %a3), %a3 /* move lpc coef pointer four samples backwards */ + movem.l (%a3), %d4-%d7 /* load four coefs */ + mac.l %a6, %d7, (%a4)+, %a6, %acc0 + mac.l %a6, %d6, (%a4)+, %a6, %acc0 + mac.l %a6, %d5, (%a4)+, %a6, %acc0 + mac.l %a6, %d4, (%a4)+, %a6, %acc0 + subq.l #1, %d3 /* any more unrolled loop operations left? */ + jne .Ldloop1 + + move.l %d2, %d3 + movq.l #3, %d4 /* mask 0x00000003 */ + and.l %d4, %d3 /* get the remaining samples to be filtered */ + jeq .Ldsave /* no remaining samples */ +.Ldloop2: + move.l -(%a3), %d4 /* get lpc coef */ + mac.l %a6, %d4, (%a4)+, %a6, %acc0 + subq.l #1, %d3 /* any more iterations left? */ + jne .Ldloop2 +.Ldsave: + movclr.l %acc0, %d3 /* get result */ + asr.l %d1, %d3 /* shift lp_quantization bits right */ + add.l (%a0)+, %d3 /* add residual */ + move.l %d3, (-4, %a4) /* history pointer is one sample past data pointer */ + addq.l #4, %a2 /* increment history pointer */ + subq.l #1, %d0 /* decrement data_len */ + jne .Ldefault /* are we done? */ + /* if so, fall through to exit */ + +.Lexit: + movem.l (%sp), %d2-%d7/%a2-%a6 + lea.l (44, %sp), %sp + rts |