summaryrefslogtreecommitdiff
path: root/apps/codecs/libffmpegFLAC/coldfire.S
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/libffmpegFLAC/coldfire.S')
-rw-r--r--apps/codecs/libffmpegFLAC/coldfire.S535
1 files changed, 0 insertions, 535 deletions
diff --git a/apps/codecs/libffmpegFLAC/coldfire.S b/apps/codecs/libffmpegFLAC/coldfire.S
deleted file mode 100644
index efbb907..0000000
--- a/apps/codecs/libffmpegFLAC/coldfire.S
+++ /dev/null
@@ -1,535 +0,0 @@
-/***************************************************************************
- * __________ __ ___.
- * Open \______ \ ____ ____ | | _\_ |__ _______ ___
- * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
- * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
- * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
- * \/ \/ \/ \/ \/
- * $Id$
- *
- * Copyright (C) 2005 by Thom Johansen
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- ****************************************************************************/
-
-/* The following are assembler optimised version of the LPC filtering
- routines needed for FLAC decoding. They is optimised for use with the
- MCF5249 processor, or any other similar ColdFire core with the EMAC unit.
- */
-
-/* This routine deals with sample widths 16 and lower. All LPC filtering up to
- order 10 is done in specially optimised unrolled loops, while every order
- above this is handled by a slower default routine.
- */
- .section .icode,"ax",@progbits
- .global lpc_decode_emac
- .align 2
-lpc_decode_emac:
- lea.l (-44, %sp), %sp
- movem.l %d2-%d7/%a2-%a6, (%sp)
- movem.l (44+4, %sp), %d0-%d2/%a0-%a1
- /* d0 = blocksize, d1 = qlevel, d2 = pred_order
- a0 = data, a1 = coeffs
- */
-
- /* the data pointer always lags behind history pointer by 'pred_order'
- samples. since we have one loop for each order, we can hard code this
- and free a register by not saving data pointer.
- */
- move.l %d2, %d3
- neg.l %d3
- lea.l (%a0, %d3.l*4), %a0 | history
- clr.l %d3
- move.l %d3, %macsr | we'll need integer mode for this
- tst.l %d0
- jeq .exit | zero samples to process, exit
- moveq.l #10, %d3
- cmp.l %d3, %d2
- jgt .default | order is over 10, jump to default case
- jmp.l (2, %pc, %d2.l*4) | jump to loop corresponding to pred_order
-| jumptable:
- bra.w .exit | zero order filter isn't possible, exit function
- bra.w .order1
- bra.w .order2
- bra.w .order3
- bra.w .order4
- bra.w .order5
- bra.w .order6
- bra.w .order7
- bra.w .order8
- bra.w .order9
-
-| last jump table entry coincides with target, so leave it out
-.order10:
- movem.l (%a1), %d3-%d7/%a1-%a5 | load lpc coefs
- move.l (%a0)+, %a6 | load first history sample
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, (%a0)+, %a6, %acc0
- mac.l %a6, %a3, (%a0)+, %a6, %acc0
- mac.l %a6, %a2, (%a0)+, %a6, %acc0
- mac.l %a6, %a1, (%a0)+, %a6, %acc0
- mac.l %a6, %d7, (%a0)+, %a6, %acc0
- mac.l %a6, %d6, (%a0)+, %a6, %acc0
- mac.l %a6, %d5, (%a0)+, %a6, %acc0
- mac.l %a6, %d4, (%a0)+, %a6, %acc0
- mac.l %a6, %d3, (-9*4, %a0), %a6, %acc0 | load for the next iteration
- movclr.l %acc0, %d2 | get sum
- asr.l %d1, %d2 | shift sum by qlevel bits
- add.l %d2, (%a0) | add residual and save
- lea.l (-8*4, %a0), %a0 | point history back at second element
- subq.l #1, %d0 | decrement sample count
- jne 1b | are we done?
- jra .exit
-
-.order9:
- movem.l (%a1), %d4-%d7/%a1-%a5
- move.l (%a0)+, %a6
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, (%a0)+, %a6, %acc0
- mac.l %a6, %a3, (%a0)+, %a6, %acc0
- mac.l %a6, %a2, (%a0)+, %a6, %acc0
- mac.l %a6, %a1, (%a0)+, %a6, %acc0
- mac.l %a6, %d7, (%a0)+, %a6, %acc0
- mac.l %a6, %d6, (%a0)+, %a6, %acc0
- mac.l %a6, %d5, (%a0)+, %a6, %acc0
- mac.l %a6, %d4, (-8*4, %a0), %a6, %acc0
- movclr.l %acc0, %d2
- asr.l %d1, %d2
- add.l %d2, (%a0)
- lea.l (-7*4, %a0), %a0
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.order8:
- movem.l (%a1), %d5-%d7/%a1-%a5
- move.l (%a0)+, %a6
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, (%a0)+, %a6, %acc0
- mac.l %a6, %a3, (%a0)+, %a6, %acc0
- mac.l %a6, %a2, (%a0)+, %a6, %acc0
- mac.l %a6, %a1, (%a0)+, %a6, %acc0
- mac.l %a6, %d7, (%a0)+, %a6, %acc0
- mac.l %a6, %d6, (%a0)+, %a6, %acc0
- mac.l %a6, %d5, (-7*4, %a0), %a6, %acc0
- movclr.l %acc0, %d2
- asr.l %d1, %d2
- add.l %d2, (%a0)
- lea.l (-6*4, %a0), %a0
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.order7:
- movem.l (%a1), %d6-%d7/%a1-%a5
- move.l (%a0)+, %a6
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, (%a0)+, %a6, %acc0
- mac.l %a6, %a3, (%a0)+, %a6, %acc0
- mac.l %a6, %a2, (%a0)+, %a6, %acc0
- mac.l %a6, %a1, (%a0)+, %a6, %acc0
- mac.l %a6, %d7, (%a0)+, %a6, %acc0
- mac.l %a6, %d6, (-6*4, %a0), %a6, %acc0
- movclr.l %acc0, %d2
- asr.l %d1, %d2
- add.l %d2, (%a0)
- lea.l (-5*4, %a0), %a0
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.order6:
- movem.l (%a1), %d7/%a1-%a5
- move.l (%a0)+, %a6
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, (%a0)+, %a6, %acc0
- mac.l %a6, %a3, (%a0)+, %a6, %acc0
- mac.l %a6, %a2, (%a0)+, %a6, %acc0
- mac.l %a6, %a1, (%a0)+, %a6, %acc0
- mac.l %a6, %d7, (-5*4, %a0), %a6, %acc0
- movclr.l %acc0, %d2
- asr.l %d1, %d2
- add.l %d2, (%a0)
- lea.l (-4*4, %a0), %a0
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.order5:
- movem.l (%a1), %a1-%a5
- move.l (%a0)+, %a6
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, (%a0)+, %a6, %acc0
- mac.l %a6, %a3, (%a0)+, %a6, %acc0
- mac.l %a6, %a2, (%a0)+, %a6, %acc0
- mac.l %a6, %a1, (-4*4, %a0), %a6, %acc0
- movclr.l %acc0, %d2
- asr.l %d1, %d2
- add.l %d2, (%a0)
- lea.l (-3*4, %a0), %a0
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.order4:
- movem.l (%a1), %a2-%a5
- move.l (%a0)+, %a6
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, (%a0)+, %a6, %acc0
- mac.l %a6, %a3, (%a0)+, %a6, %acc0
- mac.l %a6, %a2, (-3*4, %a0), %a6, %acc0
- movclr.l %acc0, %d2
- asr.l %d1, %d2
- add.l %d2, (%a0)
- subq.l #8, %a0
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.order3:
- movem.l (%a1), %a3-%a5
- move.l (%a0)+, %a6
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, (%a0)+, %a6, %acc0
- mac.l %a6, %a3, (-2*4, %a0), %a6, %acc0
- movclr.l %acc0, %d2
- asr.l %d1, %d2
- add.l %d2, (%a0)
- subq.l #4, %a0
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.order2:
- movem.l (%a1), %a4-%a5
- move.l (%a0)+, %a6
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, %acc0 | data for next iteration is already loaded
- movclr.l %acc0, %d2
- asr.l %d1, %d2
- add.l %d2, (%a0)
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.order1:
- | no point in using mac here
- move.l (%a1), %a5
-1:
- move.l %a5, %d2
- muls.l (%a0)+, %d2
- asr.l %d1, %d2
- add.l %d2, (%a0)
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.default:
- /* we do the filtering in an unrolled by 4 loop as far as we can, and then
- do the rest by jump table. */
- lea.l (%a1, %d2.l*4), %a2 | need to start in the other end of coefs
- move.l %a0, %a3 | working copy of history pointer
- move.l %d2, %d3
- lsr.l #2, %d3 | coefs/4, num of iterations needed in next loop
- move.l (%a3)+, %a5 | preload data for loop
-1:
- lea.l (-4*4, %a2), %a2 | move lpc coef pointer four samples backwards
- movem.l (%a2), %d4-%d7 | load four coefs
- mac.l %a5, %d7, (%a3)+, %a5, %acc0
- mac.l %a5, %d6, (%a3)+, %a5, %acc0
- mac.l %a5, %d5, (%a3)+, %a5, %acc0
- mac.l %a5, %d4, (%a3)+, %a5, %acc0
- subq.l #1, %d3 | any more unrolled loop operations left?
- jne 1b
-
- moveq.l #3, %d3 | mask 0x00000003
- and.l %d2, %d3 | get the remaining samples to be filtered
- jmp.l (2, %pc, %d3*2) | then jump into mac.l chain
-| jumptable:
- bra.b 3f | none left
- bra.b 2f | one left
- bra.b 1f | two left
-| three left
- move.l -(%a2), %d4
- mac.l %a5, %d4, (%a3)+, %a5, %acc0
-1:
- move.l -(%a2), %d4
- mac.l %a5, %d4, (%a3)+, %a5, %acc0
-2:
- move.l -(%a2), %d4
- mac.l %a5, %d4, (%a3)+, %a5, %acc0
-3:
- movclr.l %acc0, %d3 | get result
- asr.l %d1, %d3 | shift qlevel bits right
- add.l %a5, %d3 | add residual, which is in a5 by now
- move.l %d3, -(%a3) | save, a3 is also one past save location
- addq.l #4, %a0 | increment history pointer
- subq.l #1, %d0 | decrement sample count
- jne .default | are we done?
- jra .exit | if so, fall through to exit
-
-
-/* This routine deals with sample widths 24 and lower. All LPC filtering up to
- order 8 is done in specially optimised unrolled loops, while every order
- above this is handled by a slower default routine.
- */
- .global lpc_decode_emac_wide
- .align 2
-lpc_decode_emac_wide:
- lea.l (-44, %sp), %sp
- movem.l %d2-%d7/%a2-%a6, (%sp)
- movem.l (44+4, %sp), %d0-%d1/%d3/%a0-%a1
- /* d0 = blocksize, d1 = qlevel, d3 = pred_order
- a0 = data, a1 = coeffs
- */
-
- /* the data pointer always lags behind history pointer by 'pred_order'
- samples. since we have one loop for each order, we can hard code this
- and free a register by not saving data pointer.
- */
- move.l %d3, %d2
- neg.l %d2
- lea.l (%a0, %d2.l*4), %a0 | history
- clr.l %d2
- move.l %d2, %macsr | we'll need integer mode for this
- tst.l %d0
- jeq .exit | zero samples to process, exit
- moveq.l #32, %d2
- sub.l %d1, %d2 | calculate shift amount for extension byte
- moveq.l #8, %d4
- cmp.l %d4, %d3
- jgt .wdefault | order is over 8, jump to default case
- jmp.l (2, %pc, %d3.l*4) | jump to loop corresponding to pred_order
-| jumptable:
- bra.w .exit | zero order filter isn't possible, exit function
- bra.w .worder1
- bra.w .worder2
- bra.w .worder3
- bra.w .worder4
- bra.w .worder5
- bra.w .worder6
- bra.w .worder7
-
-| last jump table entry coincides with target, so leave it out
-.worder8:
- movem.l (%a1), %d5-%d7/%a1-%a5 | load lpc coefs
- move.l (%a0)+, %a6 | load first history sample
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, (%a0)+, %a6, %acc0
- mac.l %a6, %a3, (%a0)+, %a6, %acc0
- mac.l %a6, %a2, (%a0)+, %a6, %acc0
- mac.l %a6, %a1, (%a0)+, %a6, %acc0
- mac.l %a6, %d7, (%a0)+, %a6, %acc0
- mac.l %a6, %d6, (%a0)+, %a6, %acc0
- mac.l %a6, %d5, (-7*4, %a0), %a6, %acc0 | load for the next iteration
- move.l %accext01, %d4 | get top 8 bits of sum
- movclr.l %acc0, %d3 | then botten 32 bits
- lsr.l %d1, %d3 | shift bottom bits qlevel bits right
- asl.l %d2, %d4 | shift top bits 32 - qlevel bits left
- or.l %d4, %d3 | now combine results
- add.l %d3, (%a0) | add residual and save
- lea.l (-6*4, %a0), %a0 | point history back at second element
- subq.l #1, %d0 | decrement sample count
- jne 1b | are we done?
- jra .exit
-
-.worder7:
- movem.l (%a1), %d6-%d7/%a1-%a5
- move.l (%a0)+, %a6
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, (%a0)+, %a6, %acc0
- mac.l %a6, %a3, (%a0)+, %a6, %acc0
- mac.l %a6, %a2, (%a0)+, %a6, %acc0
- mac.l %a6, %a1, (%a0)+, %a6, %acc0
- mac.l %a6, %d7, (%a0)+, %a6, %acc0
- mac.l %a6, %d6, (-6*4, %a0), %a6, %acc0
- move.l %accext01, %d4
- movclr.l %acc0, %d3
- lsr.l %d1, %d3
- asl.l %d2, %d4
- or.l %d4, %d3
- add.l %d3, (%a0)
- lea.l (-5*4, %a0), %a0
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.worder6:
- movem.l (%a1), %d7/%a1-%a5
- move.l (%a0)+, %a6
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, (%a0)+, %a6, %acc0
- mac.l %a6, %a3, (%a0)+, %a6, %acc0
- mac.l %a6, %a2, (%a0)+, %a6, %acc0
- mac.l %a6, %a1, (%a0)+, %a6, %acc0
- mac.l %a6, %d7, (-5*4, %a0), %a6, %acc0
- move.l %accext01, %d4
- movclr.l %acc0, %d3
- lsr.l %d1, %d3
- asl.l %d2, %d4
- or.l %d4, %d3
- add.l %d3, (%a0)
- lea.l (-4*4, %a0), %a0
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.worder5:
- movem.l (%a1), %a1-%a5
- move.l (%a0)+, %a6
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, (%a0)+, %a6, %acc0
- mac.l %a6, %a3, (%a0)+, %a6, %acc0
- mac.l %a6, %a2, (%a0)+, %a6, %acc0
- mac.l %a6, %a1, (-4*4, %a0), %a6, %acc0
- move.l %accext01, %d4
- movclr.l %acc0, %d3
- lsr.l %d1, %d3
- asl.l %d2, %d4
- or.l %d4, %d3
- add.l %d3, (%a0)
- lea.l (-3*4, %a0), %a0
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.worder4:
- movem.l (%a1), %a2-%a5
- move.l (%a0)+, %a6
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, (%a0)+, %a6, %acc0
- mac.l %a6, %a3, (%a0)+, %a6, %acc0
- mac.l %a6, %a2, (-3*4, %a0), %a6, %acc0
- move.l %accext01, %d4
- movclr.l %acc0, %d3
- lsr.l %d1, %d3
- asl.l %d2, %d4
- or.l %d4, %d3
- add.l %d3, (%a0)
- subq.l #8, %a0
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.worder3:
- movem.l (%a1), %a3-%a5
- move.l (%a0)+, %a6
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, (%a0)+, %a6, %acc0
- mac.l %a6, %a3, (-2*4, %a0), %a6, %acc0
- move.l %accext01, %d4
- movclr.l %acc0, %d3
- lsr.l %d1, %d3
- asl.l %d2, %d4
- or.l %d4, %d3
- add.l %d3, (%a0)
- subq.l #4, %a0
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.worder2:
- movem.l (%a1), %a4-%a5
- move.l (%a0)+, %a6
-1:
- mac.l %a6, %a5, (%a0)+, %a6, %acc0
- mac.l %a6, %a4, %acc0 | data for next iteration is already loaded
- move.l %accext01, %d4
- movclr.l %acc0, %d3
- lsr.l %d1, %d3
- asl.l %d2, %d4
- or.l %d4, %d3
- add.l %d3, (%a0)
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.worder1:
- move.l (%a1), %a5
- move.l (%a0)+, %a6
-1:
- mac.l %a6, %a5, (%a0), %a6, %acc0
- move.l %accext01, %d4
- movclr.l %acc0, %d3
- lsr.l %d1, %d3
- asl.l %d2, %d4
- or.l %d4, %d3
- add.l %a6, %d3 | residual is already in a6
- move.l %d3, (%a0)+
- subq.l #1, %d0
- jne 1b
- jra .exit
-
-.wdefault:
- /* we do the filtering in an unrolled by 4 loop as far as we can, and then
- do the rest by jump table. */
- lea.l (%a1, %d3.l*4), %a2 | need to start in the other end of coefs
- move.l %a0, %a3 | working copy of history pointer
- move.l %d3, %d4
- lsr.l #2, %d4 | coefs/4, num of iterations needed in next loop
- move.l (%a3)+, %a5 | preload data for loop
-1:
- lea.l (-4*4, %a2), %a2 | move lpc coef pointer four samples backwards
- movem.l (%a2), %d5-%d7/%a4 | load four coefs
- mac.l %a5, %a4, (%a3)+, %a5, %acc0
- mac.l %a5, %d7, (%a3)+, %a5, %acc0
- mac.l %a5, %d6, (%a3)+, %a5, %acc0
- mac.l %a5, %d5, (%a3)+, %a5, %acc0
- subq.l #1, %d4 | any more unrolled loop operations left?
- jne 1b
-
- moveq.l #3, %d4 | mask 0x00000003
- and.l %d3, %d4 | get the remaining samples to be filtered
- jmp.l (2, %pc, %d4*2) | then jump into mac.l chain
-| jumptable:
- bra.b 3f | none left
- bra.b 2f | one left
- bra.b 1f | two left
-| three left
- move.l -(%a2), %d4
- mac.l %a5, %d4, (%a3)+, %a5, %acc0
-1:
- move.l -(%a2), %d4
- mac.l %a5, %d4, (%a3)+, %a5, %acc0
-2:
- move.l -(%a2), %d4
- mac.l %a5, %d4, (%a3)+, %a5, %acc0
-3:
- move.l %accext01, %d5 | get high 32 bits of result
- movclr.l %acc0, %d4 | get low 32 bits of result
- lsr.l %d1, %d4 | shift qlevel bits right
- asl.l %d2, %d5 | shift 32 - qlevel bits left
- or.l %d5, %d4 | combine top and low bits after shift
- add.l %a5, %d4 | add residual, which is in a5 by now
- move.l %d4, -(%a3) | save, a3 is also one past save location
- addq.l #4, %a0 | increment history pointer
- subq.l #1, %d0 | decrement sample count
- jne .wdefault | are we done?
- | if so, fall through to exit
-
-.exit:
- movem.l (%sp), %d2-%d7/%a2-%a6
- lea.l (44, %sp), %sp
- rts