summaryrefslogtreecommitdiff
path: root/apps/codecs/libwmapro/wmaprodec.c
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2010-07-26 21:43:07 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2010-07-26 21:43:07 +0000
commit87d59ab56c30eadc4691a41ba7540cca868c9b50 (patch)
treeba0b3196b5dcabc7a4c5296c1ea9027f033f023f /apps/codecs/libwmapro/wmaprodec.c
parentdbeb1ee07269942997a3676595a795d4aee547eb (diff)
downloadrockbox-87d59ab56c30eadc4691a41ba7540cca868c9b50.zip
rockbox-87d59ab56c30eadc4691a41ba7540cca868c9b50.tar.gz
rockbox-87d59ab56c30eadc4691a41ba7540cca868c9b50.tar.bz2
rockbox-87d59ab56c30eadc4691a41ba7540cca868c9b50.tar.xz
Submit part of FS#11498. Major speedup for WMA Professional on ARM and Coldfire CPUs. Introduce asm routines for multiplications, move arrays with major impact on decoding speed to IRAM. Speeds up decoding by 25% on PP5022 and 34% on mcf5249.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27582 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libwmapro/wmaprodec.c')
-rw-r--r--apps/codecs/libwmapro/wmaprodec.c43
1 files changed, 30 insertions, 13 deletions
diff --git a/apps/codecs/libwmapro/wmaprodec.c b/apps/codecs/libwmapro/wmaprodec.c
index b7879a2..1f65157 100644
--- a/apps/codecs/libwmapro/wmaprodec.c
+++ b/apps/codecs/libwmapro/wmaprodec.c
@@ -133,6 +133,7 @@
#define WMAPRO_BLOCK_MAX_BITS 12 ///< log2 of max block size
#define WMAPRO_BLOCK_MAX_SIZE (1 << WMAPRO_BLOCK_MAX_BITS) ///< maximum block size
#define WMAPRO_BLOCK_SIZES (WMAPRO_BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1) ///< possible block sizes
+#define WMAPRO_OUT_BUF_SIZE (WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2)
#define VLCBITS 9
@@ -151,6 +152,12 @@ static VLC vec1_vlc; ///< 1 coefficient per symbol
static VLC coef_vlc[2]; ///< coefficient run length vlc codes
//static float sin64[33]; ///< sinus table for decorrelation
+/* Global defined arrays to allow IRAM usage for some models. */
+static int32_t g_tmp[WMAPRO_BLOCK_MAX_SIZE] IBSS_ATTR_WMAPRO_LARGE_IRAM;
+static int32_t g_out_ch0[WMAPRO_OUT_BUF_SIZE] IBSS_ATTR;
+static int32_t g_out_ch1[WMAPRO_OUT_BUF_SIZE] IBSS_ATTR_WMAPRO_LARGE_IRAM;
+static int32_t g_out_multichannel[WMAPRO_MAX_CHANNELS-2][WMAPRO_OUT_BUF_SIZE];
+
/**
* @brief frame specific decoder context for a single channel
*/
@@ -171,8 +178,8 @@ typedef struct {
int8_t scale_factor_idx; ///< index for the transmitted scale factor values (used for resampling)
int* scale_factors; ///< pointer to the scale factor values used for decoding
uint8_t table_idx; ///< index in sf_offsets for the scale factor reference block
- int32_t* coeffs; ///< pointer to the subframe decode buffer
- DECLARE_ALIGNED(16, int32_t, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
+ int32_t* coeffs; ///< pointer to the subframe decode buffer
+ int32_t* out; ///< output buffer
} WMAProChannelCtx;
/**
@@ -195,7 +202,7 @@ typedef struct WMAProDecodeCtx {
uint8_t frame_data[MAX_FRAMESIZE +
FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
PutBitContext pb; ///< context for filling the frame_data buffer
- DECLARE_ALIGNED(16, int32_t, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT input buffer
+ int32_t* tmp; ///< IMDCT input buffer
/* frame size dependent frame information (set during initialization) */
uint32_t decode_flags; ///< used compression features
@@ -229,8 +236,8 @@ typedef struct WMAProDecodeCtx {
uint32_t frame_num; ///< current frame number
GetBitContext gb; ///< bitstream reader context
int buf_bit_size; ///< buffer size in bits
- int32_t* samples;
- int32_t* samples_end; ///< maximum samplebuffer pointer
+ int32_t* samples;
+ int32_t* samples_end; ///< maximum samplebuffer pointer
uint8_t drc_gain; ///< gain for the DRC tool
int8_t skip_frame; ///< skip output step
int8_t parsed_all_subframes; ///< all subframes decoded?
@@ -287,6 +294,15 @@ int decode_init(asf_waveformatex_t *wfx)
int i;
int log2_max_num_subframes;
int num_possible_block_sizes;
+
+ /* Use globally defined array. Allows IRAM usage for models with large IRAM. */
+ s->tmp = g_tmp;
+
+ /* Use globally defined arrays. Allows IRAM usage for up to 2 channels. */
+ s->channel[0].out = g_out_ch0;
+ s->channel[1].out = g_out_ch1;
+ for (i=2; i<WMAPRO_MAX_CHANNELS; ++i)
+ s->channel[i].out = g_out_multichannel[i-2];
#if defined(CPU_COLDFIRE)
coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE);
@@ -657,9 +673,9 @@ static void decode_decorrelation_matrix(WMAProDecodeCtx *s,
chgroup->decorrelation_matrix[y + i * chgroup->num_channels] =
(v1 * cosv) + (v2 * sinv);
chgroup->fixdecorrelation_matrix[y + x * chgroup->num_channels] =
- fixmulshift(f1, fixsinv, 31) - fixmulshift(f2, fixcosv, 31);
+ fixmul31(f1, fixsinv) - fixmul31(f2, fixcosv);
chgroup->fixdecorrelation_matrix[y + i * chgroup->num_channels] =
- fixmulshift(f1, fixcosv, 31) + fixmulshift(f2, fixsinv, 31);
+ fixmul31(f1, fixcosv) + fixmul31(f2, fixsinv);
}
}
@@ -1009,20 +1025,21 @@ static void inverse_channel_transform(WMAProDecodeCtx *s)
data_ptr = data;
while (data_ptr < data_end)
- sum += fixmulshift(*data_ptr++, *mat++, 16);
+ sum += fixmul16(*data_ptr++, *mat++);
(*ch)[y] = sum;
}
}
} else if (s->num_channels == 2) {
+ /* Scale with sqrt(2). 0x016A09E6 = (sqrt(2)*(1<<24)) */
int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];
vector_fixmul_scalar(ch_data[0] + sfb[0],
ch_data[0] + sfb[0],
- 0x00016A00, len,16);
+ 0x016A09E6, len);
vector_fixmul_scalar(ch_data[1] + sfb[0],
- ch_data[1] + sfb[0],
- 0x00016A00, len,16);
+ ch_data[1] + sfb[0],
+ 0x016A09E6, len);
}
}
@@ -1049,7 +1066,7 @@ static void wmapro_window(WMAProDecodeCtx *s)
winlen = s->subframe_len;
}
- window = sine_windows[av_log2(winlen) - BLOCK_MIN_BITS];
+ window = sine_windows[av_log2(winlen) - BLOCK_MIN_BITS];
winlen >>= 1;
@@ -1261,7 +1278,7 @@ static int decode_subframe(WMAProDecodeCtx *s)
vector_fixmul_scalar(s->tmp+start,
s->channel[c].coeffs + start,
- quant, end-start, 24);
+ quant, end-start);
}