diff options
| author | Michael Sevakis <jethead71@rockbox.org> | 2007-02-19 02:49:26 +0000 |
|---|---|---|
| committer | Michael Sevakis <jethead71@rockbox.org> | 2007-02-19 02:49:26 +0000 |
| commit | 36175ac9453999d2d079c521126ecc5ac7a8d984 (patch) | |
| tree | a37e87b5fd7283d1456b7a346e16c1a5ed590a2c /apps/dsp_cf.S | |
| parent | 2801a87d543f38cadd076330f329c84e23852997 (diff) | |
| download | rockbox-36175ac9453999d2d079c521126ecc5ac7a8d984.zip rockbox-36175ac9453999d2d079c521126ecc5ac7a8d984.tar.gz rockbox-36175ac9453999d2d079c521126ecc5ac7a8d984.tar.bz2 rockbox-36175ac9453999d2d079c521126ecc5ac7a8d984.tar.xz | |
SWCODEC: DSP optimizations for conversion to internal format and resampling. Assembly resampling for Coldfire. Word has it ARM will get that soon.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12399 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/dsp_cf.S')
| -rw-r--r-- | apps/dsp_cf.S | 145 |
1 files changed, 141 insertions, 4 deletions
diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S index 719d1db..233be82 100644 --- a/apps/dsp_cf.S +++ b/apps/dsp_cf.S @@ -17,8 +17,11 @@ * ****************************************************************************/ - .section .text - .global apply_crossfeed +/**************************************************************************** + * apply_crossfeed(int32_t* src[], int count) + */ + .section .text + .global apply_crossfeed apply_crossfeed: lea.l (-44, %sp), %sp movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs @@ -67,11 +70,11 @@ apply_crossfeed: addq.l #1, %d4 | index++ moveq.l #13, %d6 cmp.l %d6, %d4 | wrap index to 0 if it overflows - jlt .nowrap + jlt .cfnowrap moveq.l #13*8, %d4 sub.l %d4, %a0 | wrap back delay line ptr as well clr.l %d4 -.nowrap: +.cfnowrap: subq.l #1, %d7 jne .cfloop | save data back to struct @@ -81,4 +84,138 @@ apply_crossfeed: movem.l (%sp), %d2-%d7/%a2-%a6 lea.l (44, %sp), %sp rts +.cfend: + .size apply_crossfeed,.cfend-apply_crossfeed +/**************************************************************************** + * dsp_downsample(int channels, int count, struct resample_data *r, + * in32_t **src, int32_t **dst) + */ + .section .text + .global dsp_downsample +dsp_downsample: + lea.l -40(%sp), %sp | save non-clobberables + movem.l %d2-%d7/%a2-%a5, (%sp) | + movem.l 44(%sp), %d2-%d3/%a0-%a2| %d2 = ch = channels + | %d3 = count + | %a0 = r + | %a1 = src + | %a2 = dst + move.l 4(%a0), %d4 | %d4 = delta = r->delta + move.l #16, %d7 | %d7 = shift +.dschannel_loop: + move.l (%a0), %d5 | %d5 = phase = r->phase + move.l -4(%a1, %d2.l*4), %a3 | %a3 = s = src[ch-1] + move.l -4(%a2, %d2.l*4), %a4 | %a4 = d = dst[ch-1] + lea.l 4(%a0, %d2.l*4), %a5 | %a5 = &r->last_sample[ch-1] + move.l (%a5), %d0 | %d0 = last = r->last_sample[ch-1] + move.l -4(%a3, %d3.l*4), %d1 | r->last_sample[ch-1] = s[count-1] + move.l %d1, (%a5) | + move.l %d5, %d6 | %d6 = pos = phase >> 16 + lsr.l %d7, %d6 | + cmp.l %d3, %d6 | past end of samples? + bge.b .dsloop_skip | yes? skip loop + tst.l %d6 | need last sample of prev. frame? + bne.b .dsloop | no? start main loop + move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos] + bra.b .dsuse_last_start | start with last (last in %d0) +.dsloop: + lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos] + movem.l (%a5), %d0-%d1 | +.dsuse_last_start: + sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] + move.l %d0, %acc0 | %acc0 = previous sample + move.l %d5, %d0 | frac = (phase << 16) >> 1 + lsl.l %d7, %d0 | + lsr.l #1, %d0 | + mac.l %d0, %d1, %acc0 | %acc0 += frac * diff + move.l %acc0, %d0 | + add.l %d4, %d5 | phase += delta + move.l %d5, %d6 | pos = phase >> 16 + lsr.l %d7, %d6 | + move.l %d0, (%a4)+ | *d++ = %d0 + cmp.l %d3, %d6 | pos < count? + blt.b .dsloop | yes? continue resampling +.dsloop_skip: + subq.l #1, %d2 | ch > 0? + bgt.b .dschannel_loop | yes? process next channel + asl.l %d7, %d3 | wrap phase to start of next frame + sub.l %d3, %d5 | r->phase = phase - (count << 16) + move.l %d5, (%a0) | + move.l %a4, %d0 | return d - d[0] + sub.l (%a2), %d0 | + asr.l #2, %d0 | convert bytes->samples + movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables + move.l %acc1, %acc0 | clear %acc0 + lea.l 40(%sp), %sp | cleanup stack + rts | buh-bye +.dsend: + .size dsp_downsample,.dsend-dsp_downsample + +/**************************************************************************** + * dsp_upsample(int channels, int count, struct resample_data *r, + * in32_t **src, int32_t **dst) + */ + .section .text + .global dsp_upsample +dsp_upsample: + lea.l -40(%sp), %sp | save non-clobberables + movem.l %d2-%d7/%a2-%a5, (%sp) | + movem.l 44(%sp), %d2-%d3/%a0-%a2| %d2 = ch = channels + | %d3 = count + | %a0 = r + | %a1 = src + | %a2 = dst + move.l 4(%a0), %d4 | %d4 = delta = r->delta + swap %d4 | swap delta to high word to use + | carries to increment position +.uschannel_loop: + move.l (%a0), %d5 | %d5 = phase = r->phase + move.l -4(%a1, %d2.l*4), %a3 | %a3 = s = src[ch-1] + move.l -4(%a2, %d2.l*4), %a4 | %a4 = d = dst[ch-1] + lea.l 4(%a0, %d2.l*4), %a5 | %a5 = &r->last_sample[ch-1] + move.l (%a5), %d0 | %d0 = last = r->last_sample[ch-1] + move.l -4(%a3, %d3.l*4), %d1 | r->last_sample[ch-1] = s[count-1] + move.l %d1, (%a5) | + moveq.l #16, %d1 | %d0 = shift + move.l %d5, %d6 | %d6 = pos = phase >> 16 + lsl.l %d1, %d5 | swap phase to high word to use + | carries to increment position + lsr.l %d1, %d6 | pos == 0? + bne.b .usstart_1 | no? transistion from down + move.l (%a3), %d1 | %d1 = s[0] + sub.l %d0, %d1 | diff = s[pos] - last + bra.b .usloop_0 | jump to typical start point +.usstart_1: + cmp.l %d3, %d6 | past end of samples? + bge.b .usloop_skip | yes? skip loop +.usloop_1: + lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos] + movem.l (%a5), %d0-%d1 | + sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] +.usloop_0: + move.l %d0, %acc0 | %acc0 = previous sample + lsr.l #1, %d5 | make phase into frac + mac.l %d1, %d5, %acc0 | %acc0 += diff * frac + move.l %acc0, %d7 | + lsl.l #1, %d5 | restore frac to phase + move.l %d7, (%a4)+ | *d++ = %d0 + add.l %d4, %d5 | phase += delta + bcc.b .usloop_0 | load next values? + addq.l #1, %d6 | increment position + cmp.l %d3, %d6 | pos < count? + blt.b .usloop_1 | yes? continue resampling +.usloop_skip: + subq.l #1, %d2 | ch > 0? + bgt.b .uschannel_loop | yes? process next channel + swap %d5 | wrap phase to start of next frame + move.l %d5, (%a0) | ...and save in r->phase + move.l %a4, %d0 | return d - d[0] + sub.l (%a2), %d0 | + asr.l #2, %d0 | convert bytes->samples + movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables + move.l %acc1, %acc0 | clear %acc0 + lea.l 40(%sp), %sp | cleanup stack + rts | buh-bye +.usend: + .size dsp_upsample,.usend-dsp_upsample |