diff options
Diffstat (limited to 'apps/codecs')
| -rw-r--r-- | apps/codecs/libwma/mdct.c | 63 | ||||
| -rw-r--r-- | apps/codecs/libwma/mdct.h | 2 |
2 files changed, 40 insertions, 25 deletions
diff --git a/apps/codecs/libwma/mdct.c b/apps/codecs/libwma/mdct.c index e66be0e..a764f47 100644 --- a/apps/codecs/libwma/mdct.c +++ b/apps/codecs/libwma/mdct.c @@ -20,7 +20,14 @@ #include "wmafixed.h" #include "mdct.h" -fixed32 tcos0[1024], tsin0[1024]; //these are the sin and cos rotations used by the MDCT +/*these are the sin and cos rotations used by the MDCT*/ + +/*accessed too infrequently to give much speedup in IRAM*/ + +fixed32 *tcosarray[5], *tsinarray[5]; +fixed32 tcos0[1024], tcos1[512], tcos2[256], tcos3[128], tcos4[64]; +fixed32 tsin0[1024], tsin1[512], tsin2[256], tsin3[128], tsin4[64]; + uint16_t revtab0[1024]; /** @@ -28,16 +35,28 @@ uint16_t revtab0[1024]; */ int ff_mdct_init(MDCTContext *s, int nbits, int inverse) { - int n; - // fixed32 alpha; + int n, n4, i; memset(s, 0, sizeof(*s)); n = 1 << nbits; //nbits ranges from 12 to 8 inclusive - s->nbits = nbits; s->n = n; + n4 = n >> 2; + s->tcos = tcosarray[12-nbits]; + s->tsin = tsinarray[12-nbits]; + for(i=0;i<n4;i++) + { + + fixed32 ip = itofix32(i) + 0x2000; + ip = ip >> nbits; + + /*I can't remember why this works, but it seems to agree for ~24 bits, maybe more!*/ + s->tsin[i] = - fsincos(ip<<16, &(s->tcos[i])); + s->tcos[i] *=-1; + } (&s->fft)->nbits = nbits-2; + (&s->fft)->inverse = inverse; return 0; @@ -55,6 +74,8 @@ void ff_imdct_calc(MDCTContext *s, fixed32 *input) { int k, n8, n4, n2, n, j,scale; + const fixed32 *tcos = s->tcos; + const fixed32 *tsin = s->tsin; const fixed32 *in1, *in2; FFTComplex *z1 = (FFTComplex *)output; FFTComplex *z2 = (FFTComplex *)input; @@ -73,21 +94,19 @@ void ff_imdct_calc(MDCTContext *s, for(k = 0; k < n4; k++) { - int kshift = k<<revtabshift; - j=revtab0[kshift]; - CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos0[kshift], tsin0[kshift]); + j=revtab0[k<<revtabshift]; + CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos[k], tsin[k]); in1 += 2; in2 -= 2; } - scale = fft_calc_unscaled(&s->fft, z1); + scale = fft_calc_unscaled(&s->fft, z1); /* post rotation + reordering */ for(k = 0; k < n4; k++) { - int kshift = k<<revtabshift; - CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos0[kshift], tsin0[kshift]); + CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos[k], tsin[k]); } for(k = 0; k < n8; k++) @@ -116,9 +135,18 @@ void ff_imdct_calc(MDCTContext *s, } } +/* init MDCT */ + int mdct_init_global(void) { int i,j,m; + + /* although seemingly degenerate, these cannot actually be merged together without + a substantial increase in error which is unjustified by the tiny memory savings*/ + + tcosarray[0] = tcos0; tcosarray[1] = tcos1; tcosarray[2] = tcos2; tcosarray[3] = tcos3;tcosarray[4] = tcos4; + tsinarray[0] = tsin0; tsinarray[1] = tsin1; tsinarray[2] = tsin2; tsinarray[3] = tsin3;tsinarray[4] = tsin4; + /* init the MDCT bit reverse table here rather then in fft_init */ for(i=0;i<1024;i++) /*hard coded to a 2048 bit rotation*/ @@ -132,21 +160,6 @@ int mdct_init_global(void) revtab0[i]=m; } - for(i=0;i<1024;i++) - { - //fixed32 pi2 = fixmul32(0x20000, M_PI_F); - fixed32 ip = itofix32(i) + 0x2000; - ip = ip >> 12; - //ip = fixdiv32(ip,itofix32(n)); // PJJ optimize - //alpha = fixmul32(TWO_M_PI_F, ip); - //s->tcos[i] = -fixcos32(alpha); //alpha between 0 and pi/2 - //s->tsin[i] = -fixsin32(alpha); - - //I can't remember why this works, but it seems to agree for ~24 bits, maybe more! - tsin0[i] = - fsincos(ip<<16, &(tcos0[i])); - tcos0[i] *=-1; - } - fft_init_global(); return 0; diff --git a/apps/codecs/libwma/mdct.h b/apps/codecs/libwma/mdct.h index 67f5101..57d65ae 100644 --- a/apps/codecs/libwma/mdct.h +++ b/apps/codecs/libwma/mdct.h @@ -25,6 +25,8 @@ typedef struct MDCTContext int n; /* size of MDCT (i.e. number of input data * 2) */ int nbits; /* n = 2^nbits */ /* pre/post rotation tables */ + fixed32 *tcos; + fixed32 *tsin; FFTContext fft; } MDCTContext; |