diff options
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt')
27 files changed, 1388 insertions, 916 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h index cc52f37..8ddb9ad 100644 --- a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h +++ b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h @@ -65,10 +65,6 @@ do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) -# define C_MUL4(m,a,b) \ - do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \ - (m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0) - # define C_MULBYSCALAR( c, s ) \ do{ (c).r = S_MUL( (c).r , s ) ;\ (c).i = S_MUL( (c).i , s ) ; }while(0) diff --git a/lib/rbcodec/codecs/libopus/celt/arch.h b/lib/rbcodec/codecs/libopus/celt/arch.h index b2d26c4..035b92f 100644 --- a/lib/rbcodec/codecs/libopus/celt/arch.h +++ b/lib/rbcodec/codecs/libopus/celt/arch.h @@ -69,11 +69,9 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) #define IMUL32(a,b) ((a)*(b)) -#define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */ -#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */ +#define ABS(x) ((x) < 0 ? (-(x)) : (x)) #define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */ #define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ -#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */ #define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */ #define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ #define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */ @@ -108,6 +106,13 @@ typedef opus_val32 celt_ener; #define SCALEIN(a) (a) #define SCALEOUT(a) (a) +#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) +#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) + +static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { + return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; +} + #ifdef FIXED_DEBUG #include "fixed_debug.h" #else @@ -139,6 +144,22 @@ typedef float celt_sig; typedef float celt_norm; typedef float celt_ener; +#ifdef FLOAT_APPROX +/* This code should reliably detect NaN/inf even when -ffast-math is used. + Assumes IEEE 754 format. */ +static OPUS_INLINE int celt_isnan(float x) +{ + union {float f; opus_uint32 i;} in; + in.f = x; + return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0; +} +#else +#ifdef __FAST_MATH__ +#error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input +#endif +#define celt_isnan(x) ((x)!=(x)) +#endif + #define Q15ONE 1.0f #define NORM_SCALING 1.f @@ -148,6 +169,10 @@ typedef float celt_ener; #define VERY_LARGE16 1e15f #define Q15_ONE ((opus_val16)1.f) +/* This appears to be the same speed as C99's fabsf() but it's more portable. */ +#define ABS16(x) ((float)fabs(x)) +#define ABS32(x) ((float)fabs(x)) + #define QCONST16(x,bits) (x) #define QCONST32(x,bits) (x) @@ -186,6 +211,7 @@ typedef float celt_ener; #define MULT32_32_Q31(a,b) ((a)*(b)) #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) +#define MAC16_32_Q16(c,a,b) ((c)+(a)*(b)) #define MULT16_16_Q11_32(a,b) ((a)*(b)) #define MULT16_16_Q11(a,b) ((a)*(b)) @@ -203,6 +229,8 @@ typedef float celt_ener; #define SCALEIN(a) ((a)*CELT_SIG_SCALE) #define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE)) +#define SIG2WORD16(x) (x) + #endif /* !FIXED_POINT */ #ifndef GLOBAL_STACK_SIZE diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h index b690bc8..efb3b18 100644 --- a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h +++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h @@ -68,6 +68,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b) #undef MAC16_32_Q15 #define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b)) +/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. + Result fits in 32 bits. */ +#undef MAC16_32_Q16 +#define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b)) /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ #undef MULT32_32_Q31 diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h index 1194a7d..36a6321 100644 --- a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h +++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h @@ -82,6 +82,23 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a, } #define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b)) +/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. + Result fits in 32 bits. */ +#undef MAC16_32_Q16 +static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a, + opus_val32 b) +{ + int res; + __asm__( + "#MAC16_32_Q16\n\t" + "smlawb %0, %1, %2, %3;\n" + : "=r"(res) + : "r"(b), "r"(a), "r"(c) + ); + return res; +} +#define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b)) + /** 16x16 multiply-add where the result fits in 32 bits */ #undef MAC16_16 static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a, @@ -113,4 +130,22 @@ static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b) } #define MULT16_16(a, b) (MULT16_16_armv5e(a, b)) +#ifdef OPUS_ARM_INLINE_MEDIA + +#undef SIG2WORD16 +static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x) +{ + celt_sig res; + __asm__( + "#SIG2WORD16\n\t" + "ssat %0, #16, %1, ASR #12\n\t" + : "=r"(res) + : "r"(x+2048) + ); + return EXTRACT16(res); +} +#define SIG2WORD16(x) (SIG2WORD16_armv6(x)) + +#endif /* OPUS_ARM_INLINE_MEDIA */ + #endif diff --git a/lib/rbcodec/codecs/libopus/celt/bands.c b/lib/rbcodec/codecs/libopus/celt/bands.c index 1ad786d..caa7016 100644 --- a/lib/rbcodec/codecs/libopus/celt/bands.c +++ b/lib/rbcodec/codecs/libopus/celt/bands.c @@ -93,11 +93,11 @@ static int bitexact_log2tan(int isin,int icos) #if 0 #ifdef FIXED_POINT /* Compute the amplitude (sqrt energy) in each of the bands */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) { int i, c, N; const opus_int16 *eBands = m->eBands; - N = M*m->shortMdctSize; + N = m->shortMdctSize<<LM; c=0; do { for (i=0;i<end;i++) { @@ -105,18 +105,23 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band opus_val32 maxval=0; opus_val32 sum = 0; - j=M*eBands[i]; do { - maxval = MAX32(maxval, X[j+c*N]); - maxval = MAX32(maxval, -X[j+c*N]); - } while (++j<M*eBands[i+1]); - + maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM); if (maxval > 0) { - int shift = celt_ilog2(maxval)-10; - j=M*eBands[i]; do { - sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), - EXTRACT16(VSHR32(X[j+c*N],shift))); - } while (++j<M*eBands[i+1]); + int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1); + j=eBands[i]<<LM; + if (shift>0) + { + do { + sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)), + EXTRACT16(SHR32(X[j+c*N],shift))); + } while (++j<eBands[i+1]<<LM); + } else { + do { + sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)), + EXTRACT16(SHL32(X[j+c*N],-shift))); + } while (++j<eBands[i+1]<<LM); + } /* We're adding one here to ensure the normalized band isn't larger than unity norm */ bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); } else { @@ -151,18 +156,16 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel #else /* FIXED_POINT */ /* Compute the amplitude (sqrt energy) in each of the bands */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) { int i, c, N; const opus_int16 *eBands = m->eBands; - N = M*m->shortMdctSize; + N = m->shortMdctSize<<LM; c=0; do { for (i=0;i<end;i++) { - int j; - opus_val32 sum = 1e-27f; - for (j=M*eBands[i];j<M*eBands[i+1];j++) - sum += X[j+c*N]*X[j+c*N]; + opus_val32 sum; + sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM); bandE[i+c*m->nbEBands] = celt_sqrt(sum); /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ } @@ -192,74 +195,80 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel /* De-normalise the energy to produce the synthesis from the unit-energy bands */ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, - celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M) + celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, + int end, int M, int downsample, int silence) { - int i, c, N; + int i, N; + int bound; + celt_sig * OPUS_RESTRICT f; + const celt_norm * OPUS_RESTRICT x; const opus_int16 *eBands = m->eBands; N = M*m->shortMdctSize; - celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels"); - c=0; do { - celt_sig * OPUS_RESTRICT f; - const celt_norm * OPUS_RESTRICT x; - f = freq+c*N; - x = X+c*N+M*eBands[start]; - for (i=0;i<M*eBands[start];i++) - *f++ = 0; - for (i=start;i<end;i++) - { - int j, band_end; - opus_val16 g; - opus_val16 lg; + bound = M*eBands[end]; + if (downsample!=1) + bound = IMIN(bound, N/downsample); + if (silence) + { + bound = 0; + start = end = 0; + } + f = freq; + x = X+M*eBands[start]; + for (i=0;i<M*eBands[start];i++) + *f++ = 0; + for (i=start;i<end;i++) + { + int j, band_end; + opus_val16 g; + opus_val16 lg; #ifdef FIXED_POINT - int shift; + int shift; #endif - j=M*eBands[i]; - band_end = M*eBands[i+1]; - lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6)); + j=M*eBands[i]; + band_end = M*eBands[i+1]; + lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6)); #ifndef FIXED_POINT - g = celt_exp2(lg); + g = celt_exp2(lg); #else - /* Handle the integer part of the log energy */ - shift = 16-(lg>>DB_SHIFT); - if (shift>31) - { - shift=0; - g=0; - } else { - /* Handle the fractional part. */ - g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1)); - } - /* Handle extreme gains with negative shift. */ - if (shift<0) - { - /* For shift < -2 we'd be likely to overflow, so we're capping + /* Handle the integer part of the log energy */ + shift = 16-(lg>>DB_SHIFT); + if (shift>31) + { + shift=0; + g=0; + } else { + /* Handle the fractional part. */ + g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1)); + } + /* Handle extreme gains with negative shift. */ + if (shift<0) + { + /* For shift < -2 we'd be likely to overflow, so we're capping the gain here. This shouldn't happen unless the bitstream is already corrupted. */ - if (shift < -2) - { - g = 32767; - shift = -2; - } - do { - *f++ = SHL32(MULT16_16(*x++, g), -shift); - } while (++j<band_end); - } else + if (shift < -2) + { + g = 32767; + shift = -2; + } + do { + *f++ = SHL32(MULT16_16(*x++, g), -shift); + } while (++j<band_end); + } else #endif /* Be careful of the fixed-point "else" just above when changing this code */ do { *f++ = SHR32(MULT16_16(*x++, g), shift); } while (++j<band_end); - } - celt_assert(start <= end); - for (i=M*eBands[end];i<N;i++) - *f++ = 0; - } while (++c<C); + } + celt_assert(start <= end); + OPUS_CLEAR(&freq[bound], N-bound); } /* This prevents energy collapse for transients with multiple short MDCTs */ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, - int start, int end, opus_val16 *logE, opus_val16 *prev1logE, - opus_val16 *prev2logE, int *pulses, opus_uint32 seed) + int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE, + const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed) { int c, i, j, k; for (i=start;i<end;i++) @@ -274,7 +283,8 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas N0 = m->eBands[i+1]-m->eBands[i]; /* depth in 1/8 bits */ - depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<<LM); + celt_assert(pulses[i]>=0); + depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM; #ifdef FIXED_POINT thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1); @@ -352,7 +362,7 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas } } -static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N) +static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N) { int i = bandID; int j; @@ -372,25 +382,25 @@ static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, cons celt_norm r, l; l = X[j]; r = Y[j]; - X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r); + X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14)); /* Side is not encoded, no need to calculate */ } } -static void stereo_split(celt_norm *X, celt_norm *Y, int N) +static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N) { int j; for (j=0;j<N;j++) { - celt_norm r, l; - l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]); - r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]); - X[j] = l+r; - Y[j] = r-l; + opus_val32 r, l; + l = MULT16_16(QCONST16(.70710678f, 15), X[j]); + r = MULT16_16(QCONST16(.70710678f, 15), Y[j]); + X[j] = EXTRACT16(SHR32(ADD32(l, r), 15)); + Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15)); } } -static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) +static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N) { int j; opus_val32 xp=0, side=0; @@ -411,8 +421,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) Er = MULT16_16(mid2, mid2) + side + 2*xp; if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28)) { - for (j=0;j<N;j++) - Y[j] = X[j]; + OPUS_COPY(Y, X, N); return; } @@ -436,7 +445,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) { celt_norm r, l; /* Apply mid scaling (side is already scaled) */ - l = MULT16_16_Q15(mid, X[j]); + l = MULT16_16_P15(mid, X[j]); r = Y[j]; X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1)); Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1)); @@ -445,7 +454,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) #if 0 /* Decide whether we should spread the pulses in the current frame */ -int spreading_decision(const CELTMode *m, celt_norm *X, int *average, +int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, int last_decision, int *hf_average, int *tapset_decision, int update_hf, int end, int C, int M) { @@ -466,7 +475,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, { int j, N, tmp=0; int tcount[3] = {0,0,0}; - celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0; + const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0; N = M*(eBands[i+1]-eBands[i]); if (N<=8) continue; @@ -486,7 +495,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, /* Only include four last bands (8 kHz and up) */ if (i>m->nbEBands-4) - hf_sum += 32*(tcount[1]+tcount[0])/N; + hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N); tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); sum += tmp*256; nbBands++; @@ -496,7 +505,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, if (update_hf) { if (hf_sum) - hf_sum /= C*(4-m->nbEBands+end); + hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end)); *hf_average = (*hf_average+hf_sum)>>1; hf_sum = *hf_average; if (*tapset_decision==2) @@ -512,7 +521,8 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, } /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ celt_assert(nbBands>0); /* end has to be non-zero */ - sum /= nbBands; + celt_assert(sum>=0); + sum = celt_udiv(sum, nbBands); /* Recursive averaging */ sum = (sum+*average)>>1; *average = sum; @@ -571,8 +581,7 @@ static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard for (j=0;j<N0;j++) tmp[i*N0+j] = X[j*stride+i]; } - for (j=0;j<N;j++) - X[j] = tmp[j]; + OPUS_COPY(X, tmp, N); RESTORE_STACK; } @@ -595,8 +604,7 @@ static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard) for (j=0;j<N0;j++) tmp[j*stride+i] = X[i*N0+j]; } - for (j=0;j<N;j++) - X[j] = tmp[j]; + OPUS_COPY(X, tmp, N); RESTORE_STACK; } @@ -607,11 +615,11 @@ void haar1(celt_norm *X, int N0, int stride) for (i=0;i<stride;i++) for (j=0;j<N0;j++) { - celt_norm tmp1, tmp2; - tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]); - tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]); - X[stride*2*j+i] = tmp1 + tmp2; - X[stride*(2*j+1)+i] = tmp1 - tmp2; + opus_val32 tmp1, tmp2; + tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]); + tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]); + X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15)); + X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15)); } } @@ -626,7 +634,8 @@ static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo) /* The upper limit ensures that in a stereo split with itheta==16384, we'll always have enough bits left over to code at least one pulse in the side; otherwise it would collapse, since it doesn't get folded. */ - qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2); + qb = celt_sudiv(b+N2*offset, N2); + qb = IMIN(b-pulse_cap-(4<<BITRES), qb); qb = IMIN(8<<BITRES, qb); @@ -773,7 +782,8 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, ec_dec_update(ec, fl, fl+fs, ft); } } - itheta = (opus_int32)itheta*16384/qn; + celt_assert(itheta>=0); + itheta = celt_udiv((opus_int32)itheta*16384, qn); if (encode && stereo) { if (itheta==0) @@ -1025,8 +1035,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, fill &= cm_mask; if (!fill) { - for (j=0;j<N;j++) - X[j] = 0; + OPUS_CLEAR(X, N); } else { if (lowband == NULL) { @@ -1088,7 +1097,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, longBlocks = B0==1; - N_B /= B; + N_B = celt_udiv(N_B, B); /* Special case for one sample */ if (N==1) @@ -1102,9 +1111,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1)) { - int j; - for (j=0;j<N;j++) - lowband_scratch[j] = lowband[j]; + OPUS_COPY(lowband_scratch, lowband, N); lowband = lowband_scratch; } @@ -1432,7 +1439,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, ctx.remaining_bits = remaining_bits; if (i <= codedBands-1) { - curr_balance = balance / IMIN(3, codedBands-i); + curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i)); b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance))); } else { b = 0; diff --git a/lib/rbcodec/codecs/libopus/celt/bands.h b/lib/rbcodec/codecs/libopus/celt/bands.h index 96ba52a..69901b1 100644 --- a/lib/rbcodec/codecs/libopus/celt/bands.h +++ b/lib/rbcodec/codecs/libopus/celt/bands.h @@ -41,7 +41,7 @@ * @param X Spectrum * @param bandE Square root of the energy for each band (returned) */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M); +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM); /*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/ @@ -59,14 +59,15 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel * @param bandE Square root of the energy for each band */ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, - celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, int end, int C, int M); + celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, + int end, int M, int downsample, int silence); #define SPREAD_NONE (0) #define SPREAD_LIGHT (1) #define SPREAD_NORMAL (2) #define SPREAD_AGGRESSIVE (3) -int spreading_decision(const CELTMode *m, celt_norm *X, int *average, +int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, int last_decision, int *hf_average, int *tapset_decision, int update_hf, int end, int C, int M); @@ -104,8 +105,8 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed); void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, - int start, int end, opus_val16 *logE, opus_val16 *prev1logE, - opus_val16 *prev2logE, int *pulses, opus_uint32 seed); + int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE, + const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed); opus_uint32 celt_lcg_rand(opus_uint32 seed); diff --git a/lib/rbcodec/codecs/libopus/celt/celt.c b/lib/rbcodec/codecs/libopus/celt/celt.c index 3e0ce6e..c0a1e0d 100644 --- a/lib/rbcodec/codecs/libopus/celt/celt.c +++ b/lib/rbcodec/codecs/libopus/celt/celt.c @@ -54,6 +54,10 @@ #define PACKAGE_VERSION "unknown" #endif +#if defined(MIPSr1_ASM) +#include "mips/celt_mipsr1.h" +#endif + int resampling_factor(opus_int32 rate) { @@ -86,6 +90,63 @@ int resampling_factor(opus_int32 rate) } #ifndef OVERRIDE_COMB_FILTER_CONST +/* This version should be faster on ARM */ +#ifdef OPUS_ARM_ASM +static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, + opus_val16 g10, opus_val16 g11, opus_val16 g12) +{ + opus_val32 x0, x1, x2, x3, x4; + int i; + x4 = SHL32(x[-T-2], 1); + x3 = SHL32(x[-T-1], 1); + x2 = SHL32(x[-T], 1); + x1 = SHL32(x[-T+1], 1); + for (i=0;i<N-4;i+=5) + { + opus_val32 t; + x0=SHL32(x[i-T+2],1); + t = MAC16_32_Q16(x[i], g10, x2); + t = MAC16_32_Q16(t, g11, ADD32(x1,x3)); + t = MAC16_32_Q16(t, g12, ADD32(x0,x4)); + y[i] = t; + x4=SHL32(x[i-T+3],1); + t = MAC16_32_Q16(x[i+1], g10, x1); + t = MAC16_32_Q16(t, g11, ADD32(x0,x2)); + t = MAC16_32_Q16(t, g12, ADD32(x4,x3)); + y[i+1] = t; + x3=SHL32(x[i-T+4],1); + t = MAC16_32_Q16(x[i+2], g10, x0); + t = MAC16_32_Q16(t, g11, ADD32(x4,x1)); + t = MAC16_32_Q16(t, g12, ADD32(x3,x2)); + y[i+2] = t; + x2=SHL32(x[i-T+5],1); + t = MAC16_32_Q16(x[i+3], g10, x4); + t = MAC16_32_Q16(t, g11, ADD32(x3,x0)); + t = MAC16_32_Q16(t, g12, ADD32(x2,x1)); + y[i+3] = t; + x1=SHL32(x[i-T+6],1); + t = MAC16_32_Q16(x[i+4], g10, x3); + t = MAC16_32_Q16(t, g11, ADD32(x2,x4)); + t = MAC16_32_Q16(t, g12, ADD32(x1,x0)); + y[i+4] = t; + } +#ifdef CUSTOM_MODES + for (;i<N;i++) + { + opus_val32 t; + x0=SHL32(x[i-T+2],1); + t = MAC16_32_Q16(x[i], g10, x2); + t = MAC16_32_Q16(t, g11, ADD32(x1,x3)); + t = MAC16_32_Q16(t, g12, ADD32(x0,x4)); + y[i] = t; + x4=x3; + x3=x2; + x2=x1; + x1=x0; + } +#endif +} +#else static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, opus_val16 g10, opus_val16 g11, opus_val16 g12) { @@ -110,7 +171,9 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, } #endif +#endif +#ifndef OVERRIDE_comb_filter void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, const opus_val16 *window, int overlap) @@ -131,16 +194,19 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, OPUS_MOVE(y, x, N); return; } - g00 = MULT16_16_Q15(g0, gains[tapset0][0]); - g01 = MULT16_16_Q15(g0, gains[tapset0][1]); - g02 = MULT16_16_Q15(g0, gains[tapset0][2]); - g10 = MULT16_16_Q15(g1, gains[tapset1][0]); - g11 = MULT16_16_Q15(g1, gains[tapset1][1]); - g12 = MULT16_16_Q15(g1, gains[tapset1][2]); + g00 = MULT16_16_P15(g0, gains[tapset0][0]); + g01 = MULT16_16_P15(g0, gains[tapset0][1]); + g02 = MULT16_16_P15(g0, gains[tapset0][2]); + g10 = MULT16_16_P15(g1, gains[tapset1][0]); + g11 = MULT16_16_P15(g1, gains[tapset1][1]); + g12 = MULT16_16_P15(g1, gains[tapset1][2]); x1 = x[-T1+1]; x2 = x[-T1 ]; x3 = x[-T1-1]; x4 = x[-T1-2]; + /* If the filter didn't change, we don't need the overlap */ + if (g0==g1 && T0==T1 && tapset0==tapset1) + overlap=0; for (i=0;i<overlap;i++) { opus_val16 f; @@ -170,6 +236,7 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, /* Compute the part with the constant filter. */ comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12); } +#endif /* OVERRIDE_comb_filter */ const signed char tf_select_table[4][8] = { {0, -1, 0, -1, 0,-1, 0,-1}, diff --git a/lib/rbcodec/codecs/libopus/celt/celt.h b/lib/rbcodec/codecs/libopus/celt/celt.h index 5deea1f..b196751 100644 --- a/lib/rbcodec/codecs/libopus/celt/celt.h +++ b/lib/rbcodec/codecs/libopus/celt/celt.h @@ -134,7 +134,8 @@ int celt_decoder_get_size(int channels); int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels); -int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec); +int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, + int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum); #define celt_encoder_ctl opus_custom_encoder_ctl #define celt_decoder_ctl opus_custom_decoder_ctl @@ -205,10 +206,10 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, void init_caps(const CELTMode *m,int *cap,int LM,int C); #ifdef RESYNTH -void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch); - -void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X, - celt_sig * OPUS_RESTRICT out_mem[], int C, int LM); +void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem); +void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[], + opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient, + int LM, int downsample, int silence); #endif #ifdef __cplusplus diff --git a/lib/rbcodec/codecs/libopus/celt/celt_decoder.c b/lib/rbcodec/codecs/libopus/celt/celt_decoder.c index 77fa2d0..8af96b7 100644 --- a/lib/rbcodec/codecs/libopus/celt/celt_decoder.c +++ b/lib/rbcodec/codecs/libopus/celt/celt_decoder.c @@ -51,6 +51,9 @@ #include "celt_lpc.h" #include "vq.h" +#if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT) +#define NORM_ALIASING_HACK +#endif /**********************************************************************/ /* */ /* DECODER */ @@ -175,28 +178,24 @@ void opus_custom_decoder_destroy(CELTDecoder *st) } #endif /* CUSTOM_MODES */ -static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x) -{ -#ifdef FIXED_POINT - x = PSHR32(x, SIG_SHIFT); - x = MAX32(x, -32768); - x = MIN32(x, 32767); - return EXTRACT16(x); -#else - return (opus_val16)x; -#endif -} #ifndef RESYNTH static #endif -void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch) +void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, + celt_sig *mem, int accum) { int c; int Nd; int apply_downsampling=0; opus_val16 coef0; - + VARDECL(celt_sig, scratch); + SAVE_STACK; +#ifndef FIXED_POINT + (void)accum; + celt_assert(accum==0); +#endif + ALLOC(scratch, N, celt_sig); coef0 = coef[0]; Nd = N/downsample; c=0; do { @@ -234,11 +233,24 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c apply_downsampling=1; } else { /* Shortcut for the standard (non-custom modes) case */ - for (j=0;j<N;j++) +#ifdef FIXED_POINT + if (accum) { - celt_sig tmp = x[j] + m + VERY_SMALL; - m = MULT16_32_Q15(coef0, tmp); - y[j*C] = SCALEOUT(SIG2WORD16(tmp)); + for (j=0;j<N;j++) + { + celt_sig tmp = x[j] + m + VERY_SMALL; + m = MULT16_32_Q15(coef0, tmp); + y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp)))); + } + } else +#endif + { + for (j=0;j<N;j++) + { + celt_sig tmp = x[j] + m + VERY_SMALL; + m = MULT16_32_Q15(coef0, tmp); + y[j*C] = SCALEOUT(SIG2WORD16(tmp)); + } } } mem[c] = m; @@ -246,41 +258,94 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c if (apply_downsampling) { /* Perform down-sampling */ - for (j=0;j<Nd;j++) - y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample])); +#ifdef FIXED_POINT + if (accum) + { + for (j=0;j<Nd;j++) + y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample])))); + } else +#endif + { + for (j=0;j<Nd;j++) + y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample])); + } } } while (++c<C); + RESTORE_STACK; } -/** Compute the IMDCT and apply window for all sub-frames and - all channels in a frame */ #ifndef RESYNTH static #endif -void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X, - celt_sig * OPUS_RESTRICT out_mem[], int C, int LM) +void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[], + opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient, + int LM, int downsample, int silence) { - int b, c; + int c, i; + int M; + int b; int B; - int N; + int N, NB; int shift; - const int overlap = OVERLAP(mode); + int nbEBands; + int overlap; + VARDECL(celt_sig, freq); + SAVE_STACK; - if (shortBlocks) + overlap = mode->overlap; + nbEBands = mode->nbEBands; + N = mode->shortMdctSize<<LM; + ALLOC(freq, N, celt_sig); /**< Interleaved signal MDCTs */ + M = 1<<LM; + + if (isTransient) { - B = shortBlocks; - N = mode->shortMdctSize; + B = M; + NB = mode->shortMdctSize; shift = mode->maxLM; } else { B = 1; - N = mode->shortMdctSize<<LM; + NB = mode->shortMdctSize<<LM; shift = mode->maxLM-LM; } - c=0; do { - /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */ + + if (CC==2&&C==1) + { + /* Copying a mono streams to two channels */ + celt_sig *freq2; + denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, + downsample, silence); + /* Store a temporary copy in the output buffer because the IMDCT destroys its input. */ + freq2 = out_syn[1]+overlap/2; + OPUS_COPY(freq2, freq, N); for (b=0;b<B;b++) - clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B); - } while (++c<C); + clt_mdct_backward(&mode->mdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B); + for (b=0;b<B;b++) + clt_mdct_backward(&mode->mdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B); + } else if (CC==1&&C==2) + { + /* Downmixing a stereo stream to mono */ + celt_sig *freq2; + freq2 = out_syn[0]+overlap/2; + denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, + downsample, silence); + /* Use the output buffer as temp array before downmixing. */ + denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M, + downsample, silence); + for (i=0;i<N;i++) + freq[i] = HALF32(ADD32(freq[i],freq2[i])); + for (b=0;b<B;b++) + clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B); + } else { + /* Normal case (mono or stereo) */ + c=0; do { + denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M, + downsample, silence); + for (b=0;b<B;b++) + clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B); + } while (++c<CC); + } + RESTORE_STACK; } static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec) @@ -330,7 +395,23 @@ static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, pitch of 480 Hz. */ #define PLC_PITCH_LAG_MIN (100) -static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM) +static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch) +{ + int pitch_index; + VARDECL( opus_val16, lp_pitch_buf ); + SAVE_STACK; + ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 ); + pitch_downsample(decode_mem, lp_pitch_buf, + DECODE_BUFFER_SIZE, C, arch); + pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, + DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, + PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch); + pitch_index = PLC_PITCH_LAG_MAX-pitch_index; + RESTORE_STACK; + return pitch_index; +} + +static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) { int c; int i; @@ -343,11 +424,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R int nbEBands; int overlap; int start; - int downsample; int loss_count; int noise_based; const opus_int16 *eBands; - VARDECL(celt_sig, scratch); SAVE_STACK; mode = st->mode; @@ -367,14 +446,15 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R loss_count = st->loss_count; start = st->start; - downsample = st->downsample; noise_based = loss_count >= 5 || start != 0; - ALLOC(scratch, noise_based?N*C:N, celt_sig); if (noise_based) { /* Noise-based PLC/CNG */ - celt_sig *freq; +#ifdef NORM_ALIASING_HACK + celt_norm *X; +#else VARDECL(celt_norm, X); +#endif opus_uint32 seed; opus_val16 *plcLogE; int end; @@ -383,10 +463,13 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R end = st->end; effEnd = IMAX(start, IMIN(end, mode->effEBands)); - /* Share the interleaved signal MDCT coefficient buffer with the - deemphasis scratch buffer. */ - freq = scratch; +#ifdef NORM_ALIASING_HACK + /* This is an ugly hack that breaks aliasing rules and would be easily broken, + but it saves almost 4kB of stack. */ + X = (celt_norm*)(out_syn[C-1]+overlap/2); +#else ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ +#endif if (loss_count >= 5) plcLogE = backgroundLogE; @@ -421,20 +504,12 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R } st->rng = seed; - denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM); - - c=0; do { - int bound = eBands[effEnd]<<LM; - if (downsample!=1) - bound = IMIN(bound, N/downsample); - for (i=bound;i<N;i++) - freq[c*N+i] = 0; - } while (++c<C); c=0; do { OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+(overlap>>1)); } while (++c<C); - compute_inv_mdcts(mode, 0, freq, out_syn, C, LM); + + celt_synthesis(mode, X, out_syn, plcLogE, start, effEnd, C, C, 0, LM, st->downsample, 0); } else { /* Pitch-based PLC */ const opus_val16 *window; @@ -445,15 +520,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R if (loss_count == 0) { - VARDECL( opus_val16, lp_pitch_buf ); - ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 ); - pitch_downsample(decode_mem, lp_pitch_buf, - DECODE_BUFFER_SIZE, C, st->arch); - pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, - DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, - PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch); - pitch_index = PLC_PITCH_LAG_MAX-pitch_index; - st->last_pitch_index = pitch_index; + st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch); } else { pitch_index = st->last_pitch_index; fade = QCONST16(.8f,15); @@ -644,25 +711,23 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R } while (++c<C); } - deemphasis(out_syn, pcm, N, C, downsample, - mode->preemph, st->preemph_memD, scratch); - st->loss_count = loss_count+1; RESTORE_STACK; } -#define FREQ_X_BUF_SIZE (2*8*120) /* stereo * nbShortMdcts * shortMdctSize */ -static celt_sig s_freq[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 7680 byte */ -static celt_norm s_X[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 3840 byte */ -int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec) +int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, + int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum) { int c, i, N; int spread_decision; opus_int32 bits; ec_dec _dec; - VARDECL(celt_sig, freq); +#ifdef NORM_ALIASING_HACK + celt_norm *X; +#else VARDECL(celt_norm, X); +#endif VARDECL(int, fine_quant); VARDECL(int, pulses); VARDECL(int, cap); @@ -680,6 +745,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat int intra_ener; const int CC = st->channels; int LM, M; + int start; + int end; int effEnd; int codedBands; int alloc_trim; @@ -706,11 +773,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat nbEBands = mode->nbEBands; overlap = mode->overlap; eBands = mode->eBands; + start = st->start; + end = st->end; frame_size *= st->downsample; - c=0; do { - decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); - } while (++c<CC); lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC); oldBandE = lpc+CC*LPC_ORDER; oldLogE = oldBandE + 2*nbEBands; @@ -728,7 +794,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat if (data0<0) return OPUS_INVALID_PACKET; } - st->end = IMAX(1, mode->effEBands-2*(data0>>5)); + st->end = end = IMAX(1, mode->effEBands-2*(data0>>5)); LM = (data0>>3)&0x3; C = 1 + ((data0>>2)&0x1); data++; @@ -755,14 +821,19 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat return OPUS_BAD_ARG; N = M*mode->shortMdctSize; + c=0; do { + decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); + out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; + } while (++c<CC); - effEnd = st->end; + effEnd = end; if (effEnd > mode->effEBands) effEnd = mode->effEBands; if (data == NULL || len<=1) { - celt_decode_lost(st, pcm, N, LM); + celt_decode_lost(st, N, LM); + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); RESTORE_STACK; return frame_size/st->downsample; } @@ -798,7 +869,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat postfilter_gain = 0; postfilter_pitch = 0; postfilter_tapset = 0; - if (st->start==0 && tell+16 <= total_bits) + if (start==0 && tell+16 <= total_bits) { if(ec_dec_bit_logp(dec, 1)) { @@ -829,11 +900,11 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat /* Decode the global flags (first symbols in the stream) */ intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0; /* Get band energies */ - unquant_coarse_energy(mode, st->start, st->end, oldBandE, + unquant_coarse_energy(mode, start, end, oldBandE, intra_ener, dec, C, LM); ALLOC(tf_res, nbEBands, int); - tf_decode(st->start, st->end, isTransient, tf_res, LM, dec); + tf_decode(start, end, isTransient, tf_res, LM, dec); tell = ec_tell(dec); spread_decision = SPREAD_NORMAL; @@ -849,7 +920,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat dynalloc_logp = 6; total_bits<<=BITRES; tell = ec_tell_frac(dec); - for (i=st->start;i<st->end;i++) + for (i=start;i<end;i++) { int width, quanta; int dynalloc_loop_logp; @@ -888,21 +959,28 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat ALLOC(pulses, nbEBands, int); ALLOC(fine_priority, nbEBands, int); - codedBands = compute_allocation(mode, st->start, st->end, offsets, cap, + codedBands = compute_allocation(mode, start, end, offsets, cap, alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, dec, 0, 0, 0); - unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C); + unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C); + + c=0; do { + OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); + } while (++c<CC); /* Decode fixed codebook */ ALLOC(collapse_masks, C*nbEBands, unsigned char); - /**< Interleaved normalised MDCTs */ - if (FREQ_X_BUF_SIZE >= C*N) - X = s_X; - else - ALLOC(X, C*N, celt_norm); - quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks, +#ifdef NORM_ALIASING_HACK + /* This is an ugly hack that breaks aliasing rules and would be easily broken, + but it saves almost 4kB of stack. */ + X = (celt_norm*)(out_syn[CC-1]+overlap/2); +#else + ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ +#endif + + quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng); @@ -911,58 +989,20 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat anti_collapse_on = ec_dec_bits(dec, 1); } - unquant_energy_finalise(mode, st->start, st->end, oldBandE, + unquant_energy_finalise(mode, start, end, oldBandE, fine_quant, fine_priority, len*8-ec_tell(dec), dec, C); if (anti_collapse_on) anti_collapse(mode, X, collapse_masks, LM, C, N, - st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); - - /**< Interleaved signal MDCTs */ - if (FREQ_X_BUF_SIZE >= IMAX(CC,C)*N) - freq = s_freq; - else - ALLOC(freq, IMAX(CC,C)*N, celt_sig); + start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); if (silence) { for (i=0;i<C*nbEBands;i++) oldBandE[i] = -QCONST16(28.f,DB_SHIFT); - for (i=0;i<C*N;i++) - freq[i] = 0; - } else { - /* Synthesis */ - denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M); } - c=0; do { - OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); - } while (++c<CC); - c=0; do { - int bound = M*eBands[effEnd]; - if (st->downsample!=1) - bound = IMIN(bound, N/st->downsample); - for (i=bound;i<N;i++) - freq[c*N+i] = 0; - } while (++c<C); - - c=0; do { - out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; - } while (++c<CC); - - if (CC==2&&C==1) - { - for (i=0;i<N;i++) - freq[N+i] = freq[i]; - } - if (CC==1&&C==2) - { - for (i=0;i<N;i++) - freq[i] = HALF32(ADD32(freq[i],freq[N+i])); - } - - /* Compute inverse MDCTs */ - compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM); + celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, CC, isTransient, LM, st->downsample, silence); c=0; do { st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD); @@ -989,18 +1029,14 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat st->postfilter_tapset_old = st->postfilter_tapset; } - if (C==1) { - for (i=0;i<nbEBands;i++) - oldBandE[nbEBands+i]=oldBandE[i]; - } + if (C==1) + OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands); /* In case start or end were to change */ if (!isTransient) { - for (i=0;i<2*nbEBands;i++) - oldLogE2[i] = oldLogE[i]; - for (i=0;i<2*nbEBands;i++) - oldLogE[i] = oldBandE[i]; + OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands); + OPUS_COPY(oldLogE, oldBandE, 2*nbEBands); for (i=0;i<2*nbEBands;i++) backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]); } else { @@ -1009,12 +1045,12 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat } c=0; do { - for (i=0;i<st->start;i++) + for (i=0;i<start;i++) { oldBandE[c*nbEBands+i]=0; oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); } - for (i=st->end;i<nbEBands;i++) + for (i=end;i<nbEBands;i++) { oldBandE[c*nbEBands+i]=0; oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); @@ -1022,8 +1058,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat } while (++c<2); st->rng = dec->rng; - /* We reuse freq[] as scratch space for the de-emphasis */ - deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq); + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); st->loss_count = 0; RESTORE_STACK; if (ec_tell(dec) > 8*len) @@ -1039,7 +1074,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat #ifdef FIXED_POINT int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) { - return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); } #ifndef DISABLE_FLOAT_API @@ -1056,7 +1091,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char N = frame_size; ALLOC(out, C*N, opus_int16); - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); if (ret>0) for (j=0;j<C*ret;j++) pcm[j]=out[j]*(1.f/32768.f); @@ -1070,7 +1105,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size) { - return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); } int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) @@ -1086,7 +1121,7 @@ int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data N = frame_size; ALLOC(out, C*N, celt_sig); - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); if (ret>0) for (j=0;j<C*ret;j++) diff --git a/lib/rbcodec/codecs/libopus/celt/cwrs.c b/lib/rbcodec/codecs/libopus/celt/cwrs.c index 03b8698..921100f 100644 --- a/lib/rbcodec/codecs/libopus/celt/cwrs.c +++ b/lib/rbcodec/codecs/libopus/celt/cwrs.c @@ -460,10 +460,12 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k)); } -static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ +static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ opus_uint32 p; int s; int k0; + opus_int16 val; + opus_val32 yy=0; celt_assert(_k>0); celt_assert(_n>1); while(_n>2){ @@ -487,7 +489,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ } else for(p=row[_k];p>_i;p=row[_k])_k--; _i-=p; - *_y++=(k0-_k+s)^s; + val=(k0-_k+s)^s; + *_y++=val; + yy=MAC16_16(yy,val,val); } /*Lots of dimensions case:*/ else{ @@ -507,7 +511,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ do p=CELT_PVQ_U_ROW[--_k][_n]; while(p>_i); _i-=p; - *_y++=(k0-_k+s)^s; + val=(k0-_k+s)^s; + *_y++=val; + yy=MAC16_16(yy,val,val); } } _n--; @@ -519,14 +525,19 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ k0=_k; _k=(_i+1)>>1; if(_k)_i-=2*_k-1; - *_y++=(k0-_k+s)^s; + val=(k0-_k+s)^s; + *_y++=val; + yy=MAC16_16(yy,val,val); /*_n==1*/ s=-(int)_i; - *_y=(_k+s)^s; + val=(_k+s)^s; + *_y=val; + yy=MAC16_16(yy,val,val); + return yy; } -void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ - cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); +opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ + return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); } #else /* SMALL_FOOTPRINT */ @@ -591,8 +602,10 @@ static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){ _y: Returns the vector of pulses. _u: Must contain entries [0..._k+1] of row _n of U() on input. Its contents will be destructively modified.*/ -static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ +static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ int j; + opus_int16 val; + opus_val32 yy=0; celt_assert(_n>0); j=0; do{ @@ -607,10 +620,13 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ while(p>_i)p=_u[--_k]; _i-=p; yj-=_k; - _y[j]=(yj+s)^s; + val=(yj+s)^s; + _y[j]=val; + yy=MAC16_16(yy,val,val); uprev(_u,_k+2,0); } while(++j<_n); + return yy; } /*Returns the index of the given combination of K elements chosen from a set @@ -685,13 +701,15 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ RESTORE_STACK; } -void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ +opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ VARDECL(opus_uint32,u); + int ret; SAVE_STACK; celt_assert(_k>0); ALLOC(u,_k+2U,opus_uint32); - cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); + ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); RESTORE_STACK; + return ret; } #endif /* SMALL_FOOTPRINT */ diff --git a/lib/rbcodec/codecs/libopus/celt/cwrs.h b/lib/rbcodec/codecs/libopus/celt/cwrs.h index 7dfbd07..7cd4717 100644 --- a/lib/rbcodec/codecs/libopus/celt/cwrs.h +++ b/lib/rbcodec/codecs/libopus/celt/cwrs.h @@ -43,6 +43,6 @@ void get_required_bits(opus_int16 *bits, int N, int K, int frac); void encode_pulses(const int *_y, int N, int K, ec_enc *enc); -void decode_pulses(int *_y, int N, int K, ec_dec *dec); +opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec); #endif /* CWRS_H */ diff --git a/lib/rbcodec/codecs/libopus/celt/entcode.c b/lib/rbcodec/codecs/libopus/celt/entcode.c index fa5d7c7..461a36d 100644 --- a/lib/rbcodec/codecs/libopus/celt/entcode.c +++ b/lib/rbcodec/codecs/libopus/celt/entcode.c @@ -62,6 +62,27 @@ int ec_ilog(opus_uint32 _v){ } #endif +#if 1 +/* This is a faster version of ec_tell_frac() that takes advantage + of the low (1/8 bit) resolution to use just a linear function + followed by a lookup to determine the exact transition thresholds. */ +opus_uint32 ec_tell_frac(ec_ctx *_this){ + static const unsigned correction[8] = + {35733, 38967, 42495, 46340, + 50535, 55109, 60097, 65535}; + opus_uint32 nbits; + opus_uint32 r; + int l; + unsigned b; + nbits=_this->nbits_total<<BITRES; + l=EC_ILOG(_this->rng); + r=_this->rng>>(l-16); + b = (r>>12)-8; + b += r>correction[b]; + l = (l<<3)+b; + return nbits-l; +} +#else opus_uint32 ec_tell_frac(ec_ctx *_this){ opus_uint32 nbits; opus_uint32 r; @@ -91,3 +112,42 @@ opus_uint32 ec_tell_frac(ec_ctx *_this){ } return nbits-l; } +#endif + +#ifdef USE_SMALL_DIV_TABLE +/* Result of 2^32/(2*i+1), except for i=0. */ +const opus_uint32 SMALL_DIV_TABLE[129] ICONST_ATTR = { + 0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924, + 0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111, + 0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C, + 0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084, + 0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906, + 0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A, + 0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A, + 0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104, + 0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1, + 0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2, + 0x0329161F, 0x03159721, 0x03030303, 0x02F14990, + 0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46, + 0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597, + 0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17, + 0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902, + 0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810, + 0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC, + 0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30, + 0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364, + 0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14, + 0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F, + 0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE, + 0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6, + 0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3, + 0x01539094, 0x01501501, 0x014CAB88, 0x0149539E, + 0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A, + 0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190, + 0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227, + 0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4, + 0x01194538, 0x0116E068, 0x011485F0, 0x0112358E, + 0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3, + 0x01073260, 0x0105197F, 0x0103091B, 0x01010101 +}; +#endif diff --git a/lib/rbcodec/codecs/libopus/celt/entcode.h b/lib/rbcodec/codecs/libopus/celt/entcode.h index dd13e49..13d6c84 100644 --- a/lib/rbcodec/codecs/libopus/celt/entcode.h +++ b/lib/rbcodec/codecs/libopus/celt/entcode.h @@ -34,6 +34,12 @@ # include <stddef.h> # include "ecintrin.h" +extern const opus_uint32 SMALL_DIV_TABLE[129]; + +#ifdef OPUS_ARM_ASM +#define USE_SMALL_DIV_TABLE +#endif + /*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a larger type, you can speed up the decoder by using it here.*/ typedef opus_uint32 ec_window; @@ -114,4 +120,33 @@ static OPUS_INLINE int ec_tell(ec_ctx *_this){ rounding error is in the positive direction).*/ opus_uint32 ec_tell_frac(ec_ctx *_this); +/* Tested exhaustively for all n and for 1<=d<=256 */ +static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) { + celt_assert(d>0); +#ifdef USE_SMALL_DIV_TABLE + if (d>256) + return n/d; + else { + opus_uint32 t, q; + t = EC_ILOG(d&-d); + q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32; + return q+(n-q*d >= d); + } +#else + return n/d; +#endif +} + +static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) { + celt_assert(d>0); +#ifdef USE_SMALL_DIV_TABLE + if (n<0) + return -(opus_int32)celt_udiv(-n, d); + else + return celt_udiv(n, d); +#else + return n/d; +#endif +} + #endif diff --git a/lib/rbcodec/codecs/libopus/celt/entdec.c b/lib/rbcodec/codecs/libopus/celt/entdec.c index 3c26468..0b3433e 100644 --- a/lib/rbcodec/codecs/libopus/celt/entdec.c +++ b/lib/rbcodec/codecs/libopus/celt/entdec.c @@ -138,7 +138,7 @@ void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){ unsigned ec_decode(ec_dec *_this,unsigned _ft){ unsigned s; - _this->ext=_this->rng/_ft; + _this->ext=celt_udiv(_this->rng,_ft); s=(unsigned)(_this->val/_this->ext); return _ft-EC_MINI(s+1,_ft); } diff --git a/lib/rbcodec/codecs/libopus/celt/entenc.c b/lib/rbcodec/codecs/libopus/celt/entenc.c index a7e34ec..271e4d3 100644 --- a/lib/rbcodec/codecs/libopus/celt/entenc.c +++ b/lib/rbcodec/codecs/libopus/celt/entenc.c @@ -127,7 +127,7 @@ void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){ void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){ opus_uint32 r; - r=_this->rng/_ft; + r=celt_udiv(_this->rng,_ft); if(_fl>0){ _this->val+=_this->rng-IMUL32(r,(_ft-_fl)); _this->rng=IMUL32(r,(_fh-_fl)); diff --git a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h index ecf018a..ac67d37 100644 --- a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h +++ b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h @@ -113,7 +113,11 @@ /** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. b must fit in 31 bits. Result fits in 32 bits. */ -#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) +#define MAC16_32_Q15(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) + +/** 16x32 multiplication, followed by a 16-bit shift right and 32-bit add. + Results fits in 32 bits */ +#define MAC16_32_Q16(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))) #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) #define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11)) @@ -131,4 +135,17 @@ /** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */ #define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b))) +#if defined(MIPSr1_ASM) +#include "mips/fixed_generic_mipsr1.h" +#endif + +static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) +{ + x = PSHR32(x, SIG_SHIFT); + x = MAX32(x, -32768); + x = MIN32(x, 32767); + return EXTRACT16(x); +} +#define SIG2WORD16(x) (SIG2WORD16_generic(x)) + #endif diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c index e2b8f3b..833ef5a 100644 --- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c +++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c @@ -45,73 +45,62 @@ complex numbers. It also delares the kf_ internal functions. */ -#if 0 static void kf_bfly2( kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, int m, - int N, - int mm + int N ) { kiss_fft_cpx * Fout2; - const kiss_twiddle_cpx * tw1; - int i,j; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;i<N;i++) + int i; + (void)m; +#ifdef CUSTOM_MODES + if (m==1) { - Fout = Fout_beg + i*mm; - Fout2 = Fout + m; - tw1 = st->twiddles; - for(j=0;j<m;j++) + celt_assert(m==1); + for (i=0;i<N;i++) { kiss_fft_cpx t; - Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1); - Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1); - C_MUL (t, *Fout2 , *tw1); - tw1 += fstride; + Fout2 = Fout + 1; + t = *Fout2; C_SUB( *Fout2 , *Fout , t ); C_ADDTO( *Fout , t ); - ++Fout2; - ++Fout; + Fout += 2; } - } -} + } else #endif - -static void ki_bfly2( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - kiss_fft_cpx * Fout2; - const kiss_twiddle_cpx * tw1; - kiss_fft_cpx t; - int i,j; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;i<N;i++) { - Fout = Fout_beg + i*mm; - Fout2 = Fout + m; - tw1 = st->twiddles; - for(j=0;j<m;j++) + opus_val16 tw; + tw = QCONST16(0.7071067812f, 15); + /* We know that m==4 here because the radix-2 is just after a radix-4 */ + celt_assert(m==4); + for (i=0;i<N;i++) { - C_MULC (t, *Fout2 , *tw1); - tw1 += fstride; - C_SUB( *Fout2 , *Fout , t ); - C_ADDTO( *Fout , t ); - ++Fout2; - ++Fout; + kiss_fft_cpx t; + Fout2 = Fout + 4; + t = Fout2[0]; + C_SUB( Fout2[0] , Fout[0] , t ); + C_ADDTO( Fout[0] , t ); + + t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw); + t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw); + C_SUB( Fout2[1] , Fout[1] , t ); + C_ADDTO( Fout[1] , t ); + + t.r = Fout2[2].i; + t.i = -Fout2[2].r; + C_SUB( Fout2[2] , Fout[2] , t ); + C_ADDTO( Fout[2] , t ); + + t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw); + t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw); + C_SUB( Fout2[3] , Fout[3] , t ); + C_ADDTO( Fout[3] , t ); + Fout += 8; } } } -#if 0 static void kf_bfly4( kiss_fft_cpx * Fout, const size_t fstride, @@ -121,93 +110,69 @@ static void kf_bfly4( int mm ) { - const kiss_twiddle_cpx *tw1,*tw2,*tw3; - kiss_fft_cpx scratch[6]; - const size_t m2=2*m; - const size_t m3=3*m; - int i, j; + int i; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;i<N;i++) + if (m==1) { - Fout = Fout_beg + i*mm; - tw3 = tw2 = tw1 = st->twiddles; - for (j=0;j<m;j++) + /* Degenerate case where all the twiddles are 1. */ + for (i=0;i<N;i++) { - C_MUL4(scratch[0],Fout[m] , *tw1 ); - C_MUL4(scratch[1],Fout[m2] , *tw2 ); - C_MUL4(scratch[2],Fout[m3] , *tw3 ); - - Fout->r = PSHR32(Fout->r, 2); - Fout->i = PSHR32(Fout->i, 2); - C_SUB( scratch[5] , *Fout, scratch[1] ); - C_ADDTO(*Fout, scratch[1]); - C_ADD( scratch[3] , scratch[0] , scratch[2] ); - C_SUB( scratch[4] , scratch[0] , scratch[2] ); - C_SUB( Fout[m2], *Fout, scratch[3] ); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO( *Fout , scratch[3] ); - - Fout[m].r = scratch[5].r + scratch[4].i; - Fout[m].i = scratch[5].i - scratch[4].r; - Fout[m3].r = scratch[5].r - scratch[4].i; - Fout[m3].i = scratch[5].i + scratch[4].r; - ++Fout; + kiss_fft_cpx scratch0, scratch1; + + C_SUB( scratch0 , *Fout, Fout[2] ); + C_ADDTO(*Fout, Fout[2]); + C_ADD( scratch1 , Fout[1] , Fout[3] ); + C_SUB( Fout[2], *Fout, scratch1 ); + C_ADDTO( *Fout , scratch1 ); + C_SUB( scratch1 , Fout[1] , Fout[3] ); + + Fout[1].r = scratch0.r + scratch1.i; + Fout[1].i = scratch0.i - scratch1.r; + Fout[3].r = scratch0.r - scratch1.i; + Fout[3].i = scratch0.i + scratch1.r; + Fout+=4; } - } -} -#endif - -static void ki_bfly4( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - const kiss_twiddle_cpx *tw1,*tw2,*tw3; - kiss_fft_cpx scratch[6]; - const size_t m2=2*m; - const size_t m3=3*m; - int i, j; - - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - tw3 = tw2 = tw1 = st->twiddles; - for (j=0;j<m;j++) + } else { + int j; + kiss_fft_cpx scratch[6]; + const kiss_twiddle_cpx *tw1,*tw2,*tw3; + const int m2=2*m; + const int m3=3*m; + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) { - C_MULC(scratch[0],Fout[m] , *tw1 ); - C_MULC(scratch[1],Fout[m2] , *tw2 ); - C_MULC(scratch[2],Fout[m3] , *tw3 ); - - C_SUB( scratch[5] , *Fout, scratch[1] ); - C_ADDTO(*Fout, scratch[1]); - C_ADD( scratch[3] , scratch[0] , scratch[2] ); - C_SUB( scratch[4] , scratch[0] , scratch[2] ); - C_SUB( Fout[m2], *Fout, scratch[3] ); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO( *Fout , scratch[3] ); - - Fout[m].r = scratch[5].r - scratch[4].i; - Fout[m].i = scratch[5].i + scratch[4].r; - Fout[m3].r = scratch[5].r + scratch[4].i; - Fout[m3].i = scratch[5].i - scratch[4].r; - ++Fout; + Fout = Fout_beg + i*mm; + tw3 = tw2 = tw1 = st->twiddles; + /* m is guaranteed to be a multiple of 4. */ + for (j=0;j<m;j++) + { + C_MUL(scratch[0],Fout[m] , *tw1 ); + C_MUL(scratch[1],Fout[m2] , *tw2 ); + C_MUL(scratch[2],Fout[m3] , *tw3 ); + + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); + + Fout[m].r = scratch[5].r + scratch[4].i; + Fout[m].i = scratch[5].i - scratch[4].r; + Fout[m3].r = scratch[5].r - scratch[4].i; + Fout[m3].i = scratch[5].i + scratch[4].r; + ++Fout; + } } } } + #ifndef RADIX_TWO_ONLY -#if 0 static void kf_bfly3( kiss_fft_cpx * Fout, const size_t fstride, @@ -225,14 +190,19 @@ static void kf_bfly3( kiss_twiddle_cpx epi3; kiss_fft_cpx * Fout_beg = Fout; +#ifdef FIXED_POINT + epi3.r = -16384; + epi3.i = -28378; +#else epi3 = st->twiddles[fstride*m]; +#endif for (i=0;i<N;i++) { Fout = Fout_beg + i*mm; tw1=tw2=st->twiddles; + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ k=m; do { - C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); C_MUL(scratch[1],Fout[m] , *tw1); C_MUL(scratch[2],Fout[m2] , *tw2); @@ -259,59 +229,9 @@ static void kf_bfly3( } while(--k); } } -#endif - -static void ki_bfly3( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - int i, k; - const size_t m2 = 2*m; - const kiss_twiddle_cpx *tw1,*tw2; - kiss_fft_cpx scratch[5]; - kiss_twiddle_cpx epi3; - - kiss_fft_cpx * Fout_beg = Fout; - epi3 = st->twiddles[fstride*m]; - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - tw1=tw2=st->twiddles; - k=m; - do{ - - C_MULC(scratch[1],Fout[m] , *tw1); - C_MULC(scratch[2],Fout[m2] , *tw2); - - C_ADD(scratch[3],scratch[1],scratch[2]); - C_SUB(scratch[0],scratch[1],scratch[2]); - tw1 += fstride; - tw2 += fstride*2; - - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); - - C_MULBYSCALAR( scratch[0] , -epi3.i ); - - C_ADDTO(*Fout,scratch[3]); - - Fout[m2].r = Fout[m].r + scratch[0].i; - Fout[m2].i = Fout[m].i - scratch[0].r; - - Fout[m].r -= scratch[0].i; - Fout[m].i += scratch[0].r; - ++Fout; - }while(--k); - } -} -#if 0 +#ifndef OVERRIDE_kf_bfly5 static void kf_bfly5( kiss_fft_cpx * Fout, const size_t fstride, @@ -324,13 +244,19 @@ static void kf_bfly5( kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; int i, u; kiss_fft_cpx scratch[13]; - const kiss_twiddle_cpx * twiddles = st->twiddles; const kiss_twiddle_cpx *tw; kiss_twiddle_cpx ya,yb; kiss_fft_cpx * Fout_beg = Fout; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; +#ifdef FIXED_POINT + ya.r = 10126; + ya.i = -31164; + yb.r = -26510; + yb.i = -19261; +#else + ya = st->twiddles[fstride*m]; + yb = st->twiddles[fstride*2*m]; +#endif tw=st->twiddles; for (i=0;i<N;i++) @@ -342,8 +268,8 @@ static void kf_bfly5( Fout3=Fout0+3*m; Fout4=Fout0+4*m; + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ for ( u=0; u<m; ++u ) { - C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); scratch[0] = *Fout0; C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); @@ -380,75 +306,8 @@ static void kf_bfly5( } } } -#endif - -static void ki_bfly5( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - int i, u; - kiss_fft_cpx scratch[13]; - const kiss_twiddle_cpx * twiddles = st->twiddles; - const kiss_twiddle_cpx *tw; - kiss_twiddle_cpx ya,yb; - kiss_fft_cpx * Fout_beg = Fout; +#endif /* OVERRIDE_kf_bfly5 */ - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - tw=st->twiddles; - - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - for ( u=0; u<m; ++u ) { - scratch[0] = *Fout0; - - C_MULC(scratch[1] ,*Fout1, tw[u*fstride]); - C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]); - C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]); - C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]); - - C_ADD( scratch[7],scratch[1],scratch[4]); - C_SUB( scratch[10],scratch[1],scratch[4]); - C_ADD( scratch[8],scratch[2],scratch[3]); - C_SUB( scratch[9],scratch[2],scratch[3]); - - Fout0->r += scratch[7].r + scratch[8].r; - Fout0->i += scratch[7].i + scratch[8].i; - - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); - - scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); - scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); - - C_SUB(*Fout1,scratch[5],scratch[6]); - C_ADD(*Fout4,scratch[5],scratch[6]); - - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); - scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); - scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); - - C_ADD(*Fout2,scratch[11],scratch[12]); - C_SUB(*Fout3,scratch[11],scratch[12]); - - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; - } - } -} #endif @@ -496,6 +355,9 @@ static int kf_factor(int n,opus_int16 * facbuf) { int p=4; + int i; + int stages=0; + int nbak = n; /*factor out powers of 4, powers of 2, then any remaining primes */ do { @@ -517,9 +379,30 @@ int kf_factor(int n,opus_int16 * facbuf) { return 0; } - *facbuf++ = p; - *facbuf++ = n; + facbuf[2*stages] = p; + if (p==2 && stages > 1) + { + facbuf[2*stages] = 4; + facbuf[2] = 2; + } + stages++; } while (n > 1); + n = nbak; + /* Reverse the order to get the radix 4 at the end, so we can use the + fast degenerate case. It turns out that reversing the order also + improves the noise behaviour. */ + for (i=0;i<stages/2;i++) + { + int tmp; + tmp = facbuf[2*i]; + facbuf[2*i] = facbuf[2*(stages-i-1)]; + facbuf[2*(stages-i-1)] = tmp; + } + for (i=0;i<stages;i++) + { + n /= facbuf[2*i]; + facbuf[2*i+1] = n; + } return 1; } @@ -563,14 +446,20 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co kiss_twiddle_cpx *twiddles; st->nfft=nfft; -#ifndef FIXED_POINT +#ifdef FIXED_POINT + st->scale_shift = celt_ilog2(st->nfft); + if (st->nfft == 1<<st->scale_shift) + st->scale = Q15ONE; + else + st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); +#else st->scale = 1.f/nfft; #endif if (base != NULL) { st->twiddles = base->twiddles; st->shift = 0; - while (nfft<<st->shift != base->nfft && st->shift < 32) + while (st->shift < 32 && nfft<<st->shift != base->nfft) st->shift++; if (st->shift>=32) goto fail; @@ -614,8 +503,7 @@ void opus_fft_free(const kiss_fft_state *cfg) #endif /* CUSTOM_MODES */ -#if 0 -void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) { int m2, m; int p; @@ -627,17 +515,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou /* st->shift can be -1 */ shift = st->shift>0 ? st->shift : 0; - celt_assert2 (fin != fout, "In-place FFT not supported"); - /* Bit-reverse the input */ - for (i=0;i<st->nfft;i++) - { - fout[st->bitrev[i]] = fin[i]; -#ifndef FIXED_POINT - fout[st->bitrev[i]].r *= st->scale; - fout[st->bitrev[i]].i *= st->scale; -#endif - } - fstride[0] = 1; L=0; do { @@ -656,7 +533,7 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou switch (st->factors[2*i]) { case 2: - kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); + kf_bfly2(fout, m, fstride[i]); break; case 4: kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); @@ -673,57 +550,44 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou m = m2; } } -#endif -void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +#if 0 +void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) { - int m2, m; - int p; - int L; - int fstride[MAXFACTORS]; int i; - int shift; + opus_val16 scale; +#ifdef FIXED_POINT + /* Allows us to scale with MULT16_32_Q16(), which is faster than + MULT16_32_Q15() on ARM. */ + int scale_shift = st->scale_shift-1; +#endif + scale = st->scale; - /* st->shift can be -1 */ - shift = st->shift>0 ? st->shift : 0; celt_assert2 (fin != fout, "In-place FFT not supported"); /* Bit-reverse the input */ for (i=0;i<st->nfft;i++) - fout[st->bitrev[i]] = fin[i]; - - fstride[0] = 1; - L=0; - do { - p = st->factors[2*L]; - m = st->factors[2*L+1]; - fstride[L+1] = fstride[L]*p; - L++; - } while(m!=1); - m = st->factors[2*L-1]; - for (i=L-1;i>=0;i--) { - if (i!=0) - m2 = st->factors[2*i-1]; - else - m2 = 1; - switch (st->factors[2*i]) - { - case 2: - ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); - break; - case 4: - ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); - break; -#ifndef RADIX_TWO_ONLY - case 3: - ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); - break; - case 5: - ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); - break; -#endif - } - m = m2; + kiss_fft_cpx x = fin[i]; + fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); + fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); } + opus_fft_impl(st, fout); } +#endif + +#ifdef TEST_UNIT_DFT_C +void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + int i; + celt_assert2 (fin != fout, "In-place FFT not supported"); + /* Bit-reverse the input */ + for (i=0;i<st->nfft;i++) + fout[st->bitrev[i]] = fin[i]; + for (i=0;i<st->nfft;i++) + fout[i].i = -fout[i].i; + opus_fft_impl(st, fout); + for (i=0;i<st->nfft;i++) + fout[i].i = -fout[i].i; +} +#endif diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.h b/lib/rbcodec/codecs/libopus/celt/kiss_fft.h index 66cf1f2..390b54d 100644 --- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.h +++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.h @@ -79,8 +79,9 @@ typedef struct { typedef struct kiss_fft_state{ int nfft; -#ifndef FIXED_POINT - kiss_fft_scalar scale; + opus_val16 scale; +#ifdef FIXED_POINT + int scale_shift; #endif int shift; opus_int16 factors[2*MAXFACTORS]; @@ -128,14 +129,10 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem); f[k].r and f[k].i * */ void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); +void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); -#if defined(CPU_COLDFIRE) -#define IFFT_ICODE ICODE_ATTR -#else -#define IFFT_ICODE -#endif - -void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) IFFT_ICODE; +void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); +void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); void opus_fft_free(const kiss_fft_state *cfg); diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.c b/lib/rbcodec/codecs/libopus/celt/mdct.c index 72ea180..7fa8eaf 100644 --- a/lib/rbcodec/codecs/libopus/celt/mdct.c +++ b/lib/rbcodec/codecs/libopus/celt/mdct.c @@ -53,18 +53,20 @@ #include "mathops.h" #include "stack_alloc.h" +#if defined(MIPSr1_ASM) +#include "mips/mdct_mipsr1.h" +#endif + + #ifdef CUSTOM_MODES int clt_mdct_init(mdct_lookup *l,int N, int maxshift) { int i; - int N4; kiss_twiddle_scalar *trig; -#if defined(FIXED_POINT) + int shift; int N2=N>>1; -#endif l->n = N; - N4 = N>>2; l->maxshift = maxshift; for (i=0;i<=maxshift;i++) { @@ -77,17 +79,28 @@ int clt_mdct_init(mdct_lookup *l,int N, int maxshift) return 0; #endif } - l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar)); + l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar)); if (l->trig==NULL) return 0; - /* We have enough points that sine isn't necessary */ + for (shift=0;shift<=maxshift;shift++) + { + /* We have enough points that sine isn't necessary */ #if defined(FIXED_POINT) - for (i=0;i<=N4;i++) - trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N)); +#if 1 + for (i=0;i<N2;i++) + trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2+16384),N)); #else - for (i=0;i<=N4;i++) - trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N); + for (i=0;i<N2;i++) + trig[i] = (kiss_twiddle_scalar)MAX32(-32767,MIN32(32767,floor(.5+32768*cos(2*M_PI*(i+.125)/N)))); #endif +#else + for (i=0;i<N2;i++) + trig[i] = (kiss_twiddle_scalar)cos(2*PI*(i+.125)/N); +#endif + trig += N2; + N2 >>= 1; + N >>= 1; + } return 1; } @@ -103,27 +116,37 @@ void clt_mdct_clear(mdct_lookup *l) #if 0 /* Forward MDCT trashes the input array */ +#ifndef OVERRIDE_clt_mdct_forward void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window, int overlap, int shift, int stride) { int i; int N, N2, N4; - kiss_twiddle_scalar sine; VARDECL(kiss_fft_scalar, f); - VARDECL(kiss_fft_scalar, f2); + VARDECL(kiss_fft_cpx, f2); + const kiss_fft_state *st = l->kfft[shift]; + const kiss_twiddle_scalar *trig; + opus_val16 scale; +#ifdef FIXED_POINT + /* Allows us to scale with MULT16_32_Q16(), which is faster than + MULT16_32_Q15() on ARM. */ + int scale_shift = st->scale_shift-1; +#endif SAVE_STACK; + scale = st->scale; + N = l->n; - N >>= shift; + trig = l->trig; + for (i=0;i<shift;i++) + { + N >>= 1; + trig += N; + } N2 = N>>1; N4 = N>>2; + ALLOC(f, N2, kiss_fft_scalar); - ALLOC(f2, N2, kiss_fft_scalar); - /* sin(x) ~= x here */ -#ifdef FIXED_POINT - sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; -#else - sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; -#endif + ALLOC(f2, N4, kiss_fft_cpx); /* Consider the input to be composed of four blocks: [a, b, c, d] */ /* Window, shuffle, fold */ @@ -168,125 +191,131 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar /* Pre-rotation */ { kiss_fft_scalar * OPUS_RESTRICT yp = f; - const kiss_twiddle_scalar *t = &l->trig[0]; + const kiss_twiddle_scalar *t = &trig[0]; for(i=0;i<N4;i++) { + kiss_fft_cpx yc; + kiss_twiddle_scalar t0, t1; kiss_fft_scalar re, im, yr, yi; - re = yp[0]; - im = yp[1]; - yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); - yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); - /* works because the cos is nearly one */ - *yp++ = yr + S_MUL(yi,sine); - *yp++ = yi - S_MUL(yr,sine); + t0 = t[i]; + t1 = t[N4+i]; + re = *yp++; + im = *yp++; + yr = S_MUL(re,t0) - S_MUL(im,t1); + yi = S_MUL(im,t0) + S_MUL(re,t1); + yc.r = yr; + yc.i = yi; + yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift); + yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift); + f2[st->bitrev[i]] = yc; } } - /* N/4 complex FFT, down-scales by 4/N */ - opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); + /* N/4 complex FFT, does not downscale anymore */ + opus_fft_impl(st, f2); /* Post-rotate */ { /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT fp = f2; + const kiss_fft_cpx * OPUS_RESTRICT fp = f2; kiss_fft_scalar * OPUS_RESTRICT yp1 = out; kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); - const kiss_twiddle_scalar *t = &l->trig[0]; + const kiss_twiddle_scalar *t = &trig[0]; /* Temp pointers to make it really clear to the compiler what we're doing */ for(i=0;i<N4;i++) { kiss_fft_scalar yr, yi; - yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]); - yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]); - /* works because the cos is nearly one */ - *yp1 = yr - S_MUL(yi,sine); - *yp2 = yi + S_MUL(yr,sine);; - fp += 2; + yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]); + yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); + *yp1 = yr; + *yp2 = yi; + fp++; yp1 += 2*stride; yp2 -= 2*stride; } } RESTORE_STACK; } +#endif /* OVERRIDE_clt_mdct_forward */ #endif +#ifndef OVERRIDE_clt_mdct_backward void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) { int i; int N, N2, N4; - kiss_twiddle_scalar sine; -/* VARDECL(kiss_fft_scalar, f2); - SAVE_STACK; */ + const kiss_twiddle_scalar *trig; + N = l->n; - N >>= shift; + trig = l->trig; + for (i=0;i<shift;i++) + { + N >>= 1; + trig += N; + } N2 = N>>1; N4 = N>>2; -/* ALLOC(f2, N2, kiss_fft_scalar); */ - kiss_fft_scalar f2[N2]; /* worst case 3840b */ - /* sin(x) ~= x here */ -#ifdef FIXED_POINT - sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; -#else - sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; -#endif /* Pre-rotate */ { /* Temp pointers to make it really clear to the compiler what we're doing */ const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); - kiss_fft_scalar * OPUS_RESTRICT yp = f2; - const kiss_twiddle_scalar *t = &l->trig[0]; + kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); + const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; + const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; for(i=0;i<N4;i++) { + int rev; kiss_fft_scalar yr, yi; - yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); - yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); - /* works because the cos is nearly one */ - *yp++ = yr - S_MUL(yi,sine); - *yp++ = yi + S_MUL(yr,sine); + rev = *bitrev++; + yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]); + yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]); + /* We swap real and imag because we use an FFT instead of an IFFT. */ + yp[2*rev+1] = yr; + yp[2*rev] = yi; + /* Storing the pre-rotation directly in the bitrev order. */ xp1+=2*stride; xp2-=2*stride; } } - /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */ - opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1))); + opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); /* Post-rotate and de-shuffle from both ends of the buffer at once to make it in-place. */ { - kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); - kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; - const kiss_twiddle_scalar *t = &l->trig[0]; + kiss_fft_scalar * yp0 = out+(overlap>>1); + kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; + const kiss_twiddle_scalar *t = &trig[0]; /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the middle pair will be computed twice. */ for(i=0;i<(N4+1)>>1;i++) { kiss_fft_scalar re, im, yr, yi; kiss_twiddle_scalar t0, t1; - re = yp0[0]; - im = yp0[1]; - t0 = t[i<<shift]; - t1 = t[(N4-i)<<shift]; + /* We swap real and imag because we're using an FFT instead of an IFFT. */ + re = yp0[1]; + im = yp0[0]; + t0 = t[i]; + t1 = t[N4+i]; /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL(re,t0) - S_MUL(im,t1); - yi = S_MUL(im,t0) + S_MUL(re,t1); - re = yp1[0]; - im = yp1[1]; - /* works because the cos is nearly one */ - yp0[0] = -(yr - S_MUL(yi,sine)); - yp1[1] = yi + S_MUL(yr,sine); + yr = S_MUL(re,t0) + S_MUL(im,t1); + yi = S_MUL(re,t1) - S_MUL(im,t0); + /* We swap real and imag because we're using an FFT instead of an IFFT. */ + re = yp1[1]; + im = yp1[0]; + yp0[0] = yr; + yp1[1] = yi; - t0 = t[(N4-i-1)<<shift]; - t1 = t[(i+1)<<shift]; + t0 = t[(N4-i-1)]; + t1 = t[(N2-i-1)]; /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL(re,t0) - S_MUL(im,t1); - yi = S_MUL(im,t0) + S_MUL(re,t1); - /* works because the cos is nearly one */ - yp1[0] = -(yr - S_MUL(yi,sine)); - yp0[1] = yi + S_MUL(yr,sine); + yr = S_MUL(re,t0) + S_MUL(im,t1); + yi = S_MUL(re,t1) - S_MUL(im,t0); + yp1[0] = yr; + yp0[1] = yi; yp0 += 2; yp1 -= 2; } @@ -310,5 +339,5 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala wp2--; } } -/* RESTORE_STACK; */ } +#endif /* OVERRIDE_clt_mdct_backward */ diff --git a/lib/rbcodec/codecs/libopus/celt/modes.h b/lib/rbcodec/codecs/libopus/celt/modes.h index c8340f9..be813cc 100644 --- a/lib/rbcodec/codecs/libopus/celt/modes.h +++ b/lib/rbcodec/codecs/libopus/celt/modes.h @@ -39,14 +39,6 @@ #define MAX_PERIOD 1024 -#ifndef OVERLAP -#define OVERLAP(mode) ((mode)->overlap) -#endif - -#ifndef FRAMESIZE -#define FRAMESIZE(mode) ((mode)->mdctSize) -#endif - typedef struct { int size; const opus_int16 *index; diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.c b/lib/rbcodec/codecs/libopus/celt/pitch.c index c288572..ee56a43 100644 --- a/lib/rbcodec/codecs/libopus/celt/pitch.c +++ b/lib/rbcodec/codecs/libopus/celt/pitch.c @@ -252,15 +252,15 @@ void #endif celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) { - int i,j; + int i; /*The EDSP version requires that max_pitch is at least 1, and that _x is 32-bit aligned. Since it's hard to put asserts in assembly, put them here.*/ - celt_assert(max_pitch>0); - celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); #ifdef FIXED_POINT opus_val32 maxcorr=1; #endif + celt_assert(max_pitch>0); + celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); for (i=0;i<max_pitch-3;i+=4) { opus_val32 sum[4]={0,0,0,0}; @@ -279,9 +279,8 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */ for (;i<max_pitch;i++) { - opus_val32 sum = 0; - for (j=0;j<len;j++) - sum = MAC16_16(sum, _x[j],_y[i+j]); + opus_val32 sum; + sum = celt_inner_prod(_x, _y+i, len); xcorr[i] = sum; #ifdef FIXED_POINT maxcorr = MAX32(maxcorr, sum); @@ -361,12 +360,17 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR #endif for (i=0;i<max_pitch>>1;i++) { - opus_val32 sum=0; + opus_val32 sum; xcorr[i] = 0; if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2) continue; +#ifdef FIXED_POINT + sum = 0; for (j=0;j<len>>1;j++) sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); +#else + sum = celt_inner_prod(x_lp, y+i, len>>1); +#endif xcorr[i] = MAX32(-1, sum); #ifdef FIXED_POINT maxcorr = MAX32(maxcorr, sum); @@ -457,7 +461,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, opus_val16 g1; opus_val16 cont=0; opus_val16 thresh; - T1 = (2*T0+k)/(2*k); + T1 = celt_udiv(2*T0+k, 2*k); if (T1 < minperiod) break; /* Look for another strong correlation at T1b */ @@ -469,7 +473,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, T1b = T0+T1; } else { - T1b = (2*second_check[k]*T0+k)/(2*k); + T1b = celt_udiv(2*second_check[k]*T0+k, 2*k); } dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2); xy += xy2; @@ -514,13 +518,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, pg = SHR32(frac_div32(best_xy,best_yy+1),16); for (k=0;k<3;k++) - { - int T1 = T+k-1; - xy = 0; - for (i=0;i<N;i++) - xy = MAC16_16(xy, x[i], x[i-T1]); - xcorr[k] = xy; - } + xcorr[k] = celt_inner_prod(x, x-(T+k-1), N); if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0])) offset = 1; else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2])) diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.h b/lib/rbcodec/codecs/libopus/celt/pitch.h index df317ec..96dbc0d 100644 --- a/lib/rbcodec/codecs/libopus/celt/pitch.h +++ b/lib/rbcodec/codecs/libopus/celt/pitch.h @@ -41,8 +41,12 @@ #include "x86/pitch_sse.h" #endif +#if defined(MIPSr1_ASM) +#include "mips/pitch_mipsr1.h" +#endif + #if defined(OPUS_ARM_ASM) && defined(FIXED_POINT) -# include "arm/pitch_arm.h" +//# include "arm/pitch_arm.h" #endif void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, @@ -141,6 +145,18 @@ static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y } #endif +#ifndef OVERRIDE_CELT_INNER_PROD +static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x, const opus_val16 *y, + int N) +{ + int i; + opus_val32 xy=0; + for (i=0;i<N;i++) + xy = MAC16_16(xy, x[i], y[i]); + return xy; +} +#endif + #ifdef FIXED_POINT opus_val32 #else diff --git a/lib/rbcodec/codecs/libopus/celt/rate.c b/lib/rbcodec/codecs/libopus/celt/rate.c index e13d839..f85c3ee 100644 --- a/lib/rbcodec/codecs/libopus/celt/rate.c +++ b/lib/rbcodec/codecs/libopus/celt/rate.c @@ -333,7 +333,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, /*Figure out how many left-over bits we would be adding to this band. This can include bits we've stolen back from higher, skipped bands.*/ left = total-psum; - percoeff = left/(m->eBands[codedBands]-m->eBands[start]); + percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0); band_width = m->eBands[codedBands]-m->eBands[j]; @@ -414,7 +414,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, /* Allocate the remaining bits */ left = total-psum; - percoeff = left/(m->eBands[codedBands]-m->eBands[start]); + percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; for (j=start;j<codedBands;j++) bits[j] += ((int)percoeff*(m->eBands[j+1]-m->eBands[j])); @@ -465,7 +465,8 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, offset += NClogN>>3; /* Divide with rounding */ - ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))) / (den<<BITRES)); + ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1)))); + ebits[j] = celt_udiv(ebits[j], den)>>BITRES; /* Make sure not to bust */ if (C*ebits[j] > (bits[j]>>BITRES)) diff --git a/lib/rbcodec/codecs/libopus/celt/stack_alloc.h b/lib/rbcodec/codecs/libopus/celt/stack_alloc.h index 316a6ce..2b51c8d 100644 --- a/lib/rbcodec/codecs/libopus/celt/stack_alloc.h +++ b/lib/rbcodec/codecs/libopus/celt/stack_alloc.h @@ -116,9 +116,11 @@ #else #ifdef CELT_C +char *scratch_ptr=0; char *global_stack=0; #else extern char *global_stack; +extern char *scratch_ptr; #endif /* CELT_C */ #ifdef ENABLE_VALGRIND @@ -140,8 +142,12 @@ extern char *global_stack_top; #define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1)) #define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char)))) +#if 0 /* Set this to 1 to instrument pseudostack usage */ +#define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack) +#else #define RESTORE_STACK (global_stack = _saved_stack) -#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack; +#endif +#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? (scratch_ptr=opus_alloc_scratch(GLOBAL_STACK_SIZE)) : global_stack); _saved_stack = global_stack; #endif /* ENABLE_VALGRIND */ diff --git a/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h b/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h index 92e5fe5..0396ce3 100644 --- a/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h +++ b/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h @@ -341,84 +341,84 @@ static const kiss_twiddle_cpx fft_twiddles48000_960[480] ICONST_ATTR = { #ifndef FFT_BITREV480 #define FFT_BITREV480 static const opus_int16 fft_bitrev480[480] = { -0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330, -450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225, -345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95, -215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440, -110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310, -430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205, -325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61, -181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406, -76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276, -396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171, -291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41, -161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386, -56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242, -362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137, -257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7, -127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457, -22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352, -472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222, -342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117, -237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423, -93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318, -438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188, -308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83, -203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403, -73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298, -418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154, -274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49, -169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369, -39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264, -384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134, -254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29, -149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479, +0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448, +8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456, +16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464, +24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472, +4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452, +12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460, +20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468, +28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476, +1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449, +9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457, +17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465, +25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473, +5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453, +13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461, +21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469, +29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477, +2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450, +10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458, +18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466, +26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474, +6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454, +14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462, +22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470, +30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478, +3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451, +11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459, +19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467, +27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475, +7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455, +15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463, +23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471, +31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479, }; #endif #ifndef FFT_BITREV240 #define FFT_BITREV240 static const opus_int16 fft_bitrev240[240] = { -0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165, -225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110, -170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55, -115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211, -46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156, -216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101, -161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32, -92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202, -37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147, -207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78, -138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23, -83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193, -28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124, -184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69, -129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14, -74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239, +0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224, +4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228, +8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232, +12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236, +1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225, +5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229, +9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233, +13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237, +2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226, +6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230, +10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234, +14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238, +3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227, +7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231, +11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235, +15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239, }; #endif #ifndef FFT_BITREV120 #define FFT_BITREV120 static const opus_int16 fft_bitrev120[120] = { -0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80, -110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46, -76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26, -56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97, -22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63, -93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43, -73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9, -39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119, +0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112, +4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116, +1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113, +5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117, +2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114, +6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118, +3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115, +7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119, }; #endif #ifndef FFT_BITREV60 #define FFT_BITREV60 static const opus_int16 fft_bitrev60[60] = { -0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31, -46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22, -37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13, -28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59, +0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56, +1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57, +2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58, +3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59, }; #endif @@ -426,8 +426,10 @@ static const opus_int16 fft_bitrev60[60] = { #define FFT_STATE48000_960_0 static const kiss_fft_state fft_state48000_960_0 ICONST_ATTR = { 480, /* nfft */ +17476, /* scale */ +8, /* scale_shift */ -1, /* shift */ -{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev480, /* bitrev */ fft_twiddles48000_960, /* bitrev */ }; @@ -437,8 +439,10 @@ fft_twiddles48000_960, /* bitrev */ #define FFT_STATE48000_960_1 static const kiss_fft_state fft_state48000_960_1 ICONST_ATTR = { 240, /* nfft */ +17476, /* scale */ +7, /* scale_shift */ 1, /* shift */ -{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev240, /* bitrev */ fft_twiddles48000_960, /* bitrev */ }; @@ -448,8 +452,10 @@ fft_twiddles48000_960, /* bitrev */ #define FFT_STATE48000_960_2 static const kiss_fft_state fft_state48000_960_2 ICONST_ATTR = { 120, /* nfft */ +17476, /* scale */ +6, /* scale_shift */ 2, /* shift */ -{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev120, /* bitrev */ fft_twiddles48000_960, /* bitrev */ }; @@ -459,8 +465,10 @@ fft_twiddles48000_960, /* bitrev */ #define FFT_STATE48000_960_3 static const kiss_fft_state fft_state48000_960_3 ICONST_ATTR = { 60, /* nfft */ +17476, /* scale */ +5, /* scale_shift */ 3, /* shift */ -{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev60, /* bitrev */ fft_twiddles48000_960, /* bitrev */ }; @@ -470,104 +478,368 @@ fft_twiddles48000_960, /* bitrev */ #ifndef MDCT_TWIDDLES960 #define MDCT_TWIDDLES960 -static const opus_val16 mdct_twiddles960[481] ICONST_ATTR = { -32767, 32767, 32767, 32767, 32766, -32763, 32762, 32759, 32757, 32753, -32751, 32747, 32743, 32738, 32733, -32729, 32724, 32717, 32711, 32705, -32698, 32690, 32683, 32676, 32667, -32658, 32650, 32640, 32631, 32620, -32610, 32599, 32588, 32577, 32566, -32554, 32541, 32528, 32515, 32502, -32487, 32474, 32459, 32444, 32429, -32413, 32397, 32381, 32364, 32348, -32331, 32313, 32294, 32277, 32257, -32239, 32219, 32200, 32180, 32159, -32138, 32118, 32096, 32074, 32051, -32029, 32006, 31984, 31960, 31936, -31912, 31888, 31863, 31837, 31812, -31786, 31760, 31734, 31707, 31679, -31652, 31624, 31596, 31567, 31539, -31508, 31479, 31450, 31419, 31388, -31357, 31326, 31294, 31262, 31230, -31198, 31164, 31131, 31097, 31063, -31030, 30994, 30959, 30924, 30889, -30853, 30816, 30779, 30743, 30705, -30668, 30629, 30592, 30553, 30515, -30475, 30435, 30396, 30356, 30315, -30274, 30233, 30191, 30149, 30107, -30065, 30022, 29979, 29936, 29891, -29847, 29803, 29758, 29713, 29668, -29622, 29577, 29529, 29483, 29436, -29390, 29341, 29293, 29246, 29197, -29148, 29098, 29050, 29000, 28949, -28899, 28848, 28797, 28746, 28694, -28642, 28590, 28537, 28485, 28432, -28378, 28324, 28271, 28217, 28162, -28106, 28051, 27995, 27940, 27884, -27827, 27770, 27713, 27657, 27598, -27540, 27481, 27423, 27365, 27305, -27246, 27187, 27126, 27066, 27006, -26945, 26883, 26822, 26760, 26698, -26636, 26574, 26510, 26448, 26383, -26320, 26257, 26191, 26127, 26062, -25997, 25931, 25866, 25800, 25734, -25667, 25601, 25533, 25466, 25398, -25330, 25262, 25194, 25125, 25056, -24987, 24917, 24848, 24778, 24707, -24636, 24566, 24495, 24424, 24352, -24280, 24208, 24135, 24063, 23990, -23917, 23842, 23769, 23695, 23622, -23546, 23472, 23398, 23322, 23246, -23171, 23095, 23018, 22942, 22866, -22788, 22711, 22634, 22557, 22478, -22400, 22322, 22244, 22165, 22085, -22006, 21927, 21846, 21766, 21687, -21606, 21524, 21443, 21363, 21282, -21199, 21118, 21035, 20954, 20870, -20788, 20705, 20621, 20538, 20455, -20371, 20286, 20202, 20118, 20034, -19947, 19863, 19777, 19692, 19606, -19520, 19434, 19347, 19260, 19174, -19088, 18999, 18911, 18825, 18737, -18648, 18560, 18472, 18384, 18294, -18205, 18116, 18025, 17936, 17846, -17757, 17666, 17576, 17485, 17395, -17303, 17212, 17122, 17030, 16937, -16846, 16755, 16662, 16569, 16477, -16385, 16291, 16198, 16105, 16012, -15917, 15824, 15730, 15636, 15541, -15447, 15352, 15257, 15162, 15067, -14973, 14875, 14781, 14685, 14589, -14493, 14396, 14300, 14204, 14107, -14010, 13914, 13815, 13718, 13621, -13524, 13425, 13328, 13230, 13133, -13033, 12935, 12836, 12738, 12638, -12540, 12441, 12341, 12241, 12142, -12044, 11943, 11843, 11744, 11643, -11542, 11442, 11342, 11241, 11139, -11039, 10939, 10836, 10736, 10635, -10534, 10431, 10330, 10228, 10127, -10024, 9921, 9820, 9718, 9614, -9512, 9410, 9306, 9204, 9101, -8998, 8895, 8791, 8689, 8585, -8481, 8377, 8274, 8171, 8067, -7962, 7858, 7753, 7650, 7545, -7441, 7336, 7231, 7129, 7023, -6917, 6813, 6709, 6604, 6498, -6393, 6288, 6182, 6077, 5973, -5867, 5760, 5656, 5549, 5445, -5339, 5232, 5127, 5022, 4914, -4809, 4703, 4596, 4490, 4384, -4278, 4171, 4065, 3958, 3852, -3745, 3640, 3532, 3426, 3318, -3212, 3106, 2998, 2891, 2786, -2679, 2570, 2465, 2358, 2251, -2143, 2037, 1929, 1823, 1715, -1609, 1501, 1393, 1287, 1180, -1073, 964, 858, 751, 644, -535, 429, 322, 214, 107, -0, }; +static const opus_val16 mdct_twiddles960[1800] ICONST_ATTR = { +32767, 32767, 32767, 32766, 32765, +32763, 32761, 32759, 32756, 32753, +32750, 32746, 32742, 32738, 32733, +32728, 32722, 32717, 32710, 32704, +32697, 32690, 32682, 32674, 32666, +32657, 32648, 32639, 32629, 32619, +32609, 32598, 32587, 32576, 32564, +32552, 32539, 32526, 32513, 32500, +32486, 32472, 32457, 32442, 32427, +32411, 32395, 32379, 32362, 32345, +32328, 32310, 32292, 32274, 32255, +32236, 32217, 32197, 32177, 32157, +32136, 32115, 32093, 32071, 32049, +32027, 32004, 31981, 31957, 31933, +31909, 31884, 31859, 31834, 31809, +31783, 31756, 31730, 31703, 31676, +31648, 31620, 31592, 31563, 31534, +31505, 31475, 31445, 31415, 31384, +31353, 31322, 31290, 31258, 31226, +31193, 31160, 31127, 31093, 31059, +31025, 30990, 30955, 30920, 30884, +30848, 30812, 30775, 30738, 30701, +30663, 30625, 30587, 30548, 30509, +30470, 30430, 30390, 30350, 30309, +30269, 30227, 30186, 30144, 30102, +30059, 30016, 29973, 29930, 29886, +29842, 29797, 29752, 29707, 29662, +29616, 29570, 29524, 29477, 29430, +29383, 29335, 29287, 29239, 29190, +29142, 29092, 29043, 28993, 28943, +28892, 28842, 28791, 28739, 28688, +28636, 28583, 28531, 28478, 28425, +28371, 28317, 28263, 28209, 28154, +28099, 28044, 27988, 27932, 27876, +27820, 27763, 27706, 27648, 27591, +27533, 27474, 27416, 27357, 27298, +27238, 27178, 27118, 27058, 26997, +26936, 26875, 26814, 26752, 26690, +26628, 26565, 26502, 26439, 26375, +26312, 26247, 26183, 26119, 26054, +25988, 25923, 25857, 25791, 25725, +25658, 25592, 25524, 25457, 25389, +25322, 25253, 25185, 25116, 25047, +24978, 24908, 24838, 24768, 24698, +24627, 24557, 24485, 24414, 24342, +24270, 24198, 24126, 24053, 23980, +23907, 23834, 23760, 23686, 23612, +23537, 23462, 23387, 23312, 23237, +23161, 23085, 23009, 22932, 22856, +22779, 22701, 22624, 22546, 22468, +22390, 22312, 22233, 22154, 22075, +21996, 21916, 21836, 21756, 21676, +21595, 21515, 21434, 21352, 21271, +21189, 21107, 21025, 20943, 20860, +20777, 20694, 20611, 20528, 20444, +20360, 20276, 20192, 20107, 20022, +19937, 19852, 19767, 19681, 19595, +19509, 19423, 19336, 19250, 19163, +19076, 18988, 18901, 18813, 18725, +18637, 18549, 18460, 18372, 18283, +18194, 18104, 18015, 17925, 17835, +17745, 17655, 17565, 17474, 17383, +17292, 17201, 17110, 17018, 16927, +16835, 16743, 16650, 16558, 16465, +16372, 16279, 16186, 16093, 15999, +15906, 15812, 15718, 15624, 15529, +15435, 15340, 15245, 15150, 15055, +14960, 14864, 14769, 14673, 14577, +14481, 14385, 14288, 14192, 14095, +13998, 13901, 13804, 13706, 13609, +13511, 13414, 13316, 13218, 13119, +13021, 12923, 12824, 12725, 12626, +12527, 12428, 12329, 12230, 12130, +12030, 11930, 11831, 11730, 11630, +11530, 11430, 11329, 11228, 11128, +11027, 10926, 10824, 10723, 10622, +10520, 10419, 10317, 10215, 10113, +10011, 9909, 9807, 9704, 9602, +9499, 9397, 9294, 9191, 9088, +8985, 8882, 8778, 8675, 8572, +8468, 8364, 8261, 8157, 8053, +7949, 7845, 7741, 7637, 7532, +7428, 7323, 7219, 7114, 7009, +6905, 6800, 6695, 6590, 6485, +6380, 6274, 6169, 6064, 5958, +5853, 5747, 5642, 5536, 5430, +5325, 5219, 5113, 5007, 4901, +4795, 4689, 4583, 4476, 4370, +4264, 4157, 4051, 3945, 3838, +3732, 3625, 3518, 3412, 3305, +3198, 3092, 2985, 2878, 2771, +2664, 2558, 2451, 2344, 2237, +2130, 2023, 1916, 1809, 1702, +1594, 1487, 1380, 1273, 1166, +1059, 952, 844, 737, 630, +523, 416, 308, 201, 94, +-13, -121, -228, -335, -442, +-550, -657, -764, -871, -978, +-1086, -1193, -1300, -1407, -1514, +-1621, -1728, -1835, -1942, -2049, +-2157, -2263, -2370, -2477, -2584, +-2691, -2798, -2905, -3012, -3118, +-3225, -3332, -3439, -3545, -3652, +-3758, -3865, -3971, -4078, -4184, +-4290, -4397, -4503, -4609, -4715, +-4821, -4927, -5033, -5139, -5245, +-5351, -5457, -5562, -5668, -5774, +-5879, -5985, -6090, -6195, -6301, +-6406, -6511, -6616, -6721, -6826, +-6931, -7036, -7140, -7245, -7349, +-7454, -7558, -7663, -7767, -7871, +-7975, -8079, -8183, -8287, -8390, +-8494, -8597, -8701, -8804, -8907, +-9011, -9114, -9217, -9319, -9422, +-9525, -9627, -9730, -9832, -9934, +-10037, -10139, -10241, -10342, -10444, +-10546, -10647, -10748, -10850, -10951, +-11052, -11153, -11253, -11354, -11455, +-11555, -11655, -11756, -11856, -11955, +-12055, -12155, -12254, -12354, -12453, +-12552, -12651, -12750, -12849, -12947, +-13046, -13144, -13242, -13340, -13438, +-13536, -13633, -13731, -13828, -13925, +-14022, -14119, -14216, -14312, -14409, +-14505, -14601, -14697, -14793, -14888, +-14984, -15079, -15174, -15269, -15364, +-15459, -15553, -15647, -15741, -15835, +-15929, -16023, -16116, -16210, -16303, +-16396, -16488, -16581, -16673, -16766, +-16858, -16949, -17041, -17133, -17224, +-17315, -17406, -17497, -17587, -17678, +-17768, -17858, -17948, -18037, -18127, +-18216, -18305, -18394, -18483, -18571, +-18659, -18747, -18835, -18923, -19010, +-19098, -19185, -19271, -19358, -19444, +-19531, -19617, -19702, -19788, -19873, +-19959, -20043, -20128, -20213, -20297, +-20381, -20465, -20549, -20632, -20715, +-20798, -20881, -20963, -21046, -21128, +-21210, -21291, -21373, -21454, -21535, +-21616, -21696, -21776, -21856, -21936, +-22016, -22095, -22174, -22253, -22331, +-22410, -22488, -22566, -22643, -22721, +-22798, -22875, -22951, -23028, -23104, +-23180, -23256, -23331, -23406, -23481, +-23556, -23630, -23704, -23778, -23852, +-23925, -23998, -24071, -24144, -24216, +-24288, -24360, -24432, -24503, -24574, +-24645, -24716, -24786, -24856, -24926, +-24995, -25064, -25133, -25202, -25270, +-25339, -25406, -25474, -25541, -25608, +-25675, -25742, -25808, -25874, -25939, +-26005, -26070, -26135, -26199, -26264, +-26327, -26391, -26455, -26518, -26581, +-26643, -26705, -26767, -26829, -26891, +-26952, -27013, -27073, -27133, -27193, +-27253, -27312, -27372, -27430, -27489, +-27547, -27605, -27663, -27720, -27777, +-27834, -27890, -27946, -28002, -28058, +-28113, -28168, -28223, -28277, -28331, +-28385, -28438, -28491, -28544, -28596, +-28649, -28701, -28752, -28803, -28854, +-28905, -28955, -29006, -29055, -29105, +-29154, -29203, -29251, -29299, -29347, +-29395, -29442, -29489, -29535, -29582, +-29628, -29673, -29719, -29764, -29808, +-29853, -29897, -29941, -29984, -30027, +-30070, -30112, -30154, -30196, -30238, +-30279, -30320, -30360, -30400, -30440, +-30480, -30519, -30558, -30596, -30635, +-30672, -30710, -30747, -30784, -30821, +-30857, -30893, -30929, -30964, -30999, +-31033, -31068, -31102, -31135, -31168, +-31201, -31234, -31266, -31298, -31330, +-31361, -31392, -31422, -31453, -31483, +-31512, -31541, -31570, -31599, -31627, +-31655, -31682, -31710, -31737, -31763, +-31789, -31815, -31841, -31866, -31891, +-31915, -31939, -31963, -31986, -32010, +-32032, -32055, -32077, -32099, -32120, +-32141, -32162, -32182, -32202, -32222, +-32241, -32260, -32279, -32297, -32315, +-32333, -32350, -32367, -32383, -32399, +-32415, -32431, -32446, -32461, -32475, +-32489, -32503, -32517, -32530, -32542, +-32555, -32567, -32579, -32590, -32601, +-32612, -32622, -32632, -32641, -32651, +-32659, -32668, -32676, -32684, -32692, +-32699, -32706, -32712, -32718, -32724, +-32729, -32734, -32739, -32743, -32747, +-32751, -32754, -32757, -32760, -32762, +-32764, -32765, -32767, -32767, -32767, +32767, 32767, 32765, 32761, 32756, +32750, 32742, 32732, 32722, 32710, +32696, 32681, 32665, 32647, 32628, +32608, 32586, 32562, 32538, 32512, +32484, 32455, 32425, 32393, 32360, +32326, 32290, 32253, 32214, 32174, +32133, 32090, 32046, 32001, 31954, +31906, 31856, 31805, 31753, 31700, +31645, 31588, 31530, 31471, 31411, +31349, 31286, 31222, 31156, 31089, +31020, 30951, 30880, 30807, 30733, +30658, 30582, 30504, 30425, 30345, +30263, 30181, 30096, 30011, 29924, +29836, 29747, 29656, 29564, 29471, +29377, 29281, 29184, 29086, 28987, +28886, 28784, 28681, 28577, 28471, +28365, 28257, 28147, 28037, 27925, +27812, 27698, 27583, 27467, 27349, +27231, 27111, 26990, 26868, 26744, +26620, 26494, 26367, 26239, 26110, +25980, 25849, 25717, 25583, 25449, +25313, 25176, 25038, 24900, 24760, +24619, 24477, 24333, 24189, 24044, +23898, 23751, 23602, 23453, 23303, +23152, 22999, 22846, 22692, 22537, +22380, 22223, 22065, 21906, 21746, +21585, 21423, 21261, 21097, 20933, +20767, 20601, 20434, 20265, 20096, +19927, 19756, 19584, 19412, 19239, +19065, 18890, 18714, 18538, 18361, +18183, 18004, 17824, 17644, 17463, +17281, 17098, 16915, 16731, 16546, +16361, 16175, 15988, 15800, 15612, +15423, 15234, 15043, 14852, 14661, +14469, 14276, 14083, 13889, 13694, +13499, 13303, 13107, 12910, 12713, +12515, 12317, 12118, 11918, 11718, +11517, 11316, 11115, 10913, 10710, +10508, 10304, 10100, 9896, 9691, +9486, 9281, 9075, 8869, 8662, +8455, 8248, 8040, 7832, 7623, +7415, 7206, 6996, 6787, 6577, +6366, 6156, 5945, 5734, 5523, +5311, 5100, 4888, 4675, 4463, +4251, 4038, 3825, 3612, 3399, +3185, 2972, 2758, 2544, 2330, +2116, 1902, 1688, 1474, 1260, +1045, 831, 617, 402, 188, +-27, -241, -456, -670, -885, +-1099, -1313, -1528, -1742, -1956, +-2170, -2384, -2598, -2811, -3025, +-3239, -3452, -3665, -3878, -4091, +-4304, -4516, -4728, -4941, -5153, +-5364, -5576, -5787, -5998, -6209, +-6419, -6629, -6839, -7049, -7258, +-7467, -7676, -7884, -8092, -8300, +-8507, -8714, -8920, -9127, -9332, +-9538, -9743, -9947, -10151, -10355, +-10558, -10761, -10963, -11165, -11367, +-11568, -11768, -11968, -12167, -12366, +-12565, -12762, -12960, -13156, -13352, +-13548, -13743, -13937, -14131, -14324, +-14517, -14709, -14900, -15091, -15281, +-15470, -15659, -15847, -16035, -16221, +-16407, -16593, -16777, -16961, -17144, +-17326, -17508, -17689, -17869, -18049, +-18227, -18405, -18582, -18758, -18934, +-19108, -19282, -19455, -19627, -19799, +-19969, -20139, -20308, -20475, -20642, +-20809, -20974, -21138, -21301, -21464, +-21626, -21786, -21946, -22105, -22263, +-22420, -22575, -22730, -22884, -23037, +-23189, -23340, -23490, -23640, -23788, +-23935, -24080, -24225, -24369, -24512, +-24654, -24795, -24934, -25073, -25211, +-25347, -25482, -25617, -25750, -25882, +-26013, -26143, -26272, -26399, -26526, +-26651, -26775, -26898, -27020, -27141, +-27260, -27379, -27496, -27612, -27727, +-27841, -27953, -28065, -28175, -28284, +-28391, -28498, -28603, -28707, -28810, +-28911, -29012, -29111, -29209, -29305, +-29401, -29495, -29587, -29679, -29769, +-29858, -29946, -30032, -30118, -30201, +-30284, -30365, -30445, -30524, -30601, +-30677, -30752, -30825, -30897, -30968, +-31038, -31106, -31172, -31238, -31302, +-31365, -31426, -31486, -31545, -31602, +-31658, -31713, -31766, -31818, -31869, +-31918, -31966, -32012, -32058, -32101, +-32144, -32185, -32224, -32262, -32299, +-32335, -32369, -32401, -32433, -32463, +-32491, -32518, -32544, -32568, -32591, +-32613, -32633, -32652, -32669, -32685, +-32700, -32713, -32724, -32735, -32744, +-32751, -32757, -32762, -32766, -32767, +32767, 32764, 32755, 32741, 32720, +32694, 32663, 32626, 32583, 32535, +32481, 32421, 32356, 32286, 32209, +32128, 32041, 31948, 31850, 31747, +31638, 31523, 31403, 31278, 31148, +31012, 30871, 30724, 30572, 30415, +30253, 30086, 29913, 29736, 29553, +29365, 29172, 28974, 28771, 28564, +28351, 28134, 27911, 27684, 27452, +27216, 26975, 26729, 26478, 26223, +25964, 25700, 25432, 25159, 24882, +24601, 24315, 24026, 23732, 23434, +23133, 22827, 22517, 22204, 21886, +21565, 21240, 20912, 20580, 20244, +19905, 19563, 19217, 18868, 18516, +18160, 17802, 17440, 17075, 16708, +16338, 15964, 15588, 15210, 14829, +14445, 14059, 13670, 13279, 12886, +12490, 12093, 11693, 11291, 10888, +10482, 10075, 9666, 9255, 8843, +8429, 8014, 7597, 7180, 6760, +6340, 5919, 5496, 5073, 4649, +4224, 3798, 3372, 2945, 2517, +2090, 1661, 1233, 804, 375, +-54, -483, -911, -1340, -1768, +-2197, -2624, -3052, -3479, -3905, +-4330, -4755, -5179, -5602, -6024, +-6445, -6865, -7284, -7702, -8118, +-8533, -8946, -9358, -9768, -10177, +-10584, -10989, -11392, -11793, -12192, +-12589, -12984, -13377, -13767, -14155, +-14541, -14924, -15305, -15683, -16058, +-16430, -16800, -17167, -17531, -17892, +-18249, -18604, -18956, -19304, -19649, +-19990, -20329, -20663, -20994, -21322, +-21646, -21966, -22282, -22595, -22904, +-23208, -23509, -23806, -24099, -24387, +-24672, -24952, -25228, -25499, -25766, +-26029, -26288, -26541, -26791, -27035, +-27275, -27511, -27741, -27967, -28188, +-28405, -28616, -28823, -29024, -29221, +-29412, -29599, -29780, -29957, -30128, +-30294, -30455, -30611, -30761, -30906, +-31046, -31181, -31310, -31434, -31552, +-31665, -31773, -31875, -31972, -32063, +-32149, -32229, -32304, -32373, -32437, +-32495, -32547, -32594, -32635, -32671, +-32701, -32726, -32745, -32758, -32766, +32767, 32754, 32717, 32658, 32577, +32473, 32348, 32200, 32029, 31837, +31624, 31388, 31131, 30853, 30553, +30232, 29891, 29530, 29148, 28746, +28324, 27883, 27423, 26944, 26447, +25931, 25398, 24847, 24279, 23695, +23095, 22478, 21846, 21199, 20538, +19863, 19174, 18472, 17757, 17030, +16291, 15541, 14781, 14010, 13230, +12441, 11643, 10837, 10024, 9204, +8377, 7545, 6708, 5866, 5020, +4171, 3319, 2464, 1608, 751, +-107, -965, -1822, -2678, -3532, +-4383, -5232, -6077, -6918, -7754, +-8585, -9409, -10228, -11039, -11843, +-12639, -13426, -14204, -14972, -15730, +-16477, -17213, -17937, -18648, -19347, +-20033, -20705, -21363, -22006, -22634, +-23246, -23843, -24423, -24986, -25533, +-26062, -26573, -27066, -27540, -27995, +-28431, -28848, -29245, -29622, -29979, +-30315, -30630, -30924, -31197, -31449, +-31679, -31887, -32074, -32239, -32381, +-32501, -32600, -32675, -32729, -32759, +}; #endif static const CELTMode mode48000_960_120 ICONST_ATTR = { diff --git a/lib/rbcodec/codecs/libopus/celt/vq.c b/lib/rbcodec/codecs/libopus/celt/vq.c index af991bb..b047b22 100644 --- a/lib/rbcodec/codecs/libopus/celt/vq.c +++ b/lib/rbcodec/codecs/libopus/celt/vq.c @@ -37,19 +37,27 @@ #include "os_support.h" #include "bands.h" #include "rate.h" +#include "pitch.h" +#if defined(MIPSr1_ASM) +#include "mips/vq_mipsr1.h" +#endif + +#ifndef OVERRIDE_vq_exp_rotation1 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) { int i; + opus_val16 ms; celt_norm *Xptr; Xptr = X; + ms = NEG16(s); for (i=0;i<len-stride;i++) { celt_norm x1, x2; x1 = Xptr[0]; x2 = Xptr[stride]; - Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); - *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); + *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); } Xptr = &X[len-2*stride-1]; for (i=len-2*stride-1;i>=0;i--) @@ -57,10 +65,11 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_ celt_norm x1, x2; x1 = Xptr[0]; x2 = Xptr[stride]; - Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); - *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); + *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); } } +#endif /* OVERRIDE_vq_exp_rotation1 */ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) { @@ -91,7 +100,7 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int } /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for extract_collapse_mask().*/ - len /= stride; + len = celt_udiv(len, stride); for (i=0;i<stride;i++) { if (dir < 0) @@ -140,13 +149,15 @@ static unsigned extract_collapse_mask(int *iy, int N, int B) return 1; /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for exp_rotation().*/ - N0 = N/B; + N0 = celt_udiv(N, B); collapse_mask = 0; i=0; do { int j; + unsigned tmp=0; j=0; do { - collapse_mask |= (iy[i*N0+j]!=0)<<i; + tmp |= iy[i*N0+j]; } while (++j<N0); + collapse_mask |= (tmp!=0)<<i; } while (++i<B); return collapse_mask; } @@ -322,47 +333,34 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, ec_dec *dec, opus_val16 gain) { - int i; opus_val32 Ryy; unsigned collapse_mask; -/* VARDECL(int, iy); - SAVE_STACK; */ - - /* the difference between the last two values of eband5ms shifted by maxLM - which is 22 << 3 with the static mode */ - int iy[176]; + VARDECL(int, iy); + SAVE_STACK; celt_assert2(K>0, "alg_unquant() needs at least one pulse"); celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); -/* ALLOC(iy, N, int); */ - decode_pulses(iy, N, K, dec); - Ryy = 0; - i=0; - do { - Ryy = MAC16_16(Ryy, iy[i], iy[i]); - } while (++i < N); + ALLOC(iy, N, int); + Ryy = decode_pulses(iy, N, K, dec); normalise_residual(iy, X, N, Ryy, gain); exp_rotation(X, N, -1, B, K, spread); collapse_mask = extract_collapse_mask(iy, N, B); -/* RESTORE_STACK; */ + RESTORE_STACK; return collapse_mask; } +#ifndef OVERRIDE_renormalise_vector void renormalise_vector(celt_norm *X, int N, opus_val16 gain) { int i; #ifdef FIXED_POINT int k; #endif - opus_val32 E = EPSILON; + opus_val32 E; opus_val16 g; opus_val32 t; - celt_norm *xptr = X; - for (i=0;i<N;i++) - { - E = MAC16_16(E, *xptr, *xptr); - xptr++; - } + celt_norm *xptr; + E = EPSILON + celt_inner_prod(X, X, N); #ifdef FIXED_POINT k = celt_ilog2(E)>>1; #endif @@ -377,8 +375,9 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain) } /*return celt_sqrt(E);*/ } +#endif /* OVERRIDE_renormalise_vector */ -int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) +int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N) { int i; int itheta; @@ -397,14 +396,8 @@ int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) Eside = MAC16_16(Eside, s, s); } } else { - for (i=0;i<N;i++) - { - celt_norm m, s; - m = X[i]; - s = Y[i]; - Emid = MAC16_16(Emid, m, m); - Eside = MAC16_16(Eside, s, s); - } + Emid += celt_inner_prod(X, X, N); + Eside += celt_inner_prod(Y, Y, N); } mid = celt_sqrt(Emid); side = celt_sqrt(Eside); diff --git a/lib/rbcodec/codecs/libopus/celt/vq.h b/lib/rbcodec/codecs/libopus/celt/vq.h index ffdc69c..84115cb 100644 --- a/lib/rbcodec/codecs/libopus/celt/vq.h +++ b/lib/rbcodec/codecs/libopus/celt/vq.h @@ -65,6 +65,6 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, void renormalise_vector(celt_norm *X, int N, opus_val16 gain); -int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N); +int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N); #endif /* VQ_H */ |