summaryrefslogtreecommitdiff
path: root/apps/codecs
diff options
context:
space:
mode:
authorDave Chapman <dave@dchapman.com>2006-02-07 22:16:35 +0000
committerDave Chapman <dave@dchapman.com>2006-02-07 22:16:35 +0000
commit8e46ab85a9a1c50589920897763ce53e593c9369 (patch)
tree0a0fe757659050c39d6b5160c22311915557abbd /apps/codecs
parent6099dc8b77e1b536ff47b4b74edf20f1fafda5b6 (diff)
downloadrockbox-8e46ab85a9a1c50589920897763ce53e593c9369.zip
rockbox-8e46ab85a9a1c50589920897763ce53e593c9369.tar.gz
rockbox-8e46ab85a9a1c50589920897763ce53e593c9369.tar.bz2
rockbox-8e46ab85a9a1c50589920897763ce53e593c9369.tar.xz
Patch #1426489 - Shorten codec optimisations from Mark Arigo
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8615 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
-rw-r--r--apps/codecs/libffmpegFLAC/shndec.c190
-rw-r--r--apps/codecs/libffmpegFLAC/shndec.h44
-rw-r--r--apps/codecs/shorten.c92
3 files changed, 212 insertions, 114 deletions
diff --git a/apps/codecs/libffmpegFLAC/shndec.c b/apps/codecs/libffmpegFLAC/shndec.c
index 6dde8f7..d7fc6a1 100644
--- a/apps/codecs/libffmpegFLAC/shndec.c
+++ b/apps/codecs/libffmpegFLAC/shndec.c
@@ -28,12 +28,6 @@
#include "golomb.h"
#include "shndec.h"
-/* These seem reasonable from my test files.
- Does MAX_HEADER_SIZE really need to be 16384? */
-#define MAX_PRED_ORDER 16
-#define MAX_HEADER_SIZE DEFAULT_BLOCK_SIZE*4
-//#define MAX_HEADER_SIZE 16384
-
#define ULONGSIZE 2
#define WAVE_FORMAT_PCM 0x0001
@@ -54,16 +48,6 @@
#define V2LPCQOFFSET (1 << LPCQUANT)
#define FNSIZE 2
-#define FN_DIFF0 0
-#define FN_DIFF1 1
-#define FN_DIFF2 2
-#define FN_DIFF3 3
-#define FN_QUIT 4
-#define FN_BLOCKSIZE 5
-#define FN_BITSHIFT 6
-#define FN_QLPC 7
-#define FN_ZERO 8
-#define FN_VERBATIM 9
#define VERBATIM_CKSIZE_SIZE 5
#define VERBATIM_BYTE_SIZE 8
@@ -76,22 +60,21 @@
#define get_le16(gb) bswap_16(get_bits_long(gb, 16))
#define get_le32(gb) bswap_32(get_bits_long(gb, 32))
-static inline uint32_t bswap_32(uint32_t x){
+static uint32_t bswap_32(uint32_t x){
x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
return (x>>16) | (x<<16);
}
-static inline uint16_t bswap_16(uint16_t x){
+static uint16_t bswap_16(uint16_t x){
return (x>>8) | (x<<8);
}
/* converts fourcc string to int */
-static inline int ff_get_fourcc(const char *s){
+static int ff_get_fourcc(const char *s){
//assert( strlen(s)==4 );
return (s[0]) + (s[1]<<8) + (s[2]<<16) + (s[3]<<24);
}
-static unsigned int get_uint(ShortenContext *s, int k) ICODE_ATTR;
static unsigned int get_uint(ShortenContext *s, int k)
{
if (s->version != 0)
@@ -99,10 +82,77 @@ static unsigned int get_uint(ShortenContext *s, int k)
return get_ur_golomb_shorten(&s->gb, k);
}
-static void decode_subframe_lpc(ShortenContext *s, int32_t *decoded,
- int residual_size, int pred_order) ICODE_ATTR;
-static void decode_subframe_lpc(ShortenContext *s, int32_t *decoded,
- int residual_size, int pred_order)
+#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
+static void coldfire_lshift_samples(int n, int shift, int32_t *samples) ICODE_ATTR;
+static void coldfire_lshift_samples(int n, int shift, int32_t *samples)
+{
+/*
+ for (i = 0; i < n; i++)
+ samples[i] =<< shift;
+*/
+ asm volatile (
+ "move.l %[n], %%d0 \n" /* d0 = loop counter */
+ "asr.l #2, %%d0 \n"
+ "beq l1_shift \n"
+ "l2_shift:" /* main loop (unroll by 4) */
+ "movem.l (%[x]), %%d4-%%d7 \n"
+ "asl.l %[s], %%d4 \n"
+ "asl.l %[s], %%d5 \n"
+ "asl.l %[s], %%d6 \n"
+ "asl.l %[s], %%d7 \n"
+ "movem.l %%d4-%%d7, (%[x]) \n"
+ "add.l #16, %[x] \n"
+
+ "subq.l #1, %%d0 \n"
+ "bne l2_shift \n"
+ "l1_shift:" /* any loops left? */
+ "and.l #3, %[n] \n"
+ "beq l4_shift \n"
+ "l3_shift:" /* remaining loops */
+ "move.l (%[x]), %%d4 \n"
+ "asl.l %[s], %%d4 \n"
+ "move.l %%d4, (%[x])+ \n"
+
+ "subq.l #1, %[n] \n"
+ "bne l3_shift \n"
+ "l4_shift:" /* exit */
+ : [n] "+d" (n), /* d1 */
+ [s] "+d" (shift), /* d2 */
+ [x] "+a" (samples) /* a0 */
+ :
+ : "%d0", "%d4", "%d5", "%d6", "%d7"
+ );
+}
+#endif
+
+static inline void fix_bitshift(ShortenContext *s, int32_t *samples)
+{
+ int i;
+
+ /* Wrapped samples don't get bitshifted, so we'll do them during
+ the next iteration. */
+ if (s->bitshift != 0) {
+#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
+ coldfire_lshift_samples(s->blocksize, s->bitshift, samples - s->nwrap);
+#else
+ for (i = -s->nwrap; i < (s->blocksize - s->nwrap); i++)
+ samples[i] <<= s->bitshift;
+#endif
+ }
+
+ /* Also, when we have to remember to fix the wrapped samples when
+ the bitshift changes.*/
+ if (s->bitshift != s->last_bitshift) {
+ if (s->last_bitshift != 0)
+ for (i = -s->nwrap; i < 0; i++)
+ samples[i] <<= s->last_bitshift;
+
+ s->last_bitshift = s->bitshift;
+ }
+}
+
+static inline void decode_subframe_lpc(ShortenContext *s, int32_t *decoded,
+ int residual_size, int pred_order)
{
int sum, i, j;
int coeffs[MAX_PRED_ORDER];
@@ -121,18 +171,12 @@ static void decode_subframe_lpc(ShortenContext *s, int32_t *decoded,
}
}
-int shorten_decode_frame(ShortenContext *s,
- int32_t *decoded,
- int32_t *offset,
- uint8_t *buf,
- int buf_size)
+static inline int shorten_decode_frame(ShortenContext *s, int32_t *decoded,
+ int32_t *offset)
{
int i;
int32_t sum;
- init_get_bits(&s->gb, buf, buf_size*8);
- get_bits(&s->gb, s->bitindex);
-
int cmd = get_ur_golomb_shorten(&s->gb, FNSIZE);
switch (cmd) {
case FN_ZERO:
@@ -201,10 +245,6 @@ int shorten_decode_frame(ShortenContext *s,
case FN_QLPC:
{
int pred_order = get_ur_golomb_shorten(&s->gb, LPCQSIZE);
- if (pred_order > MAX_PRED_ORDER) {
- return -2;
- }
-
for (i=0; i<pred_order; i++)
decoded[i - pred_order] -= coffset;
decode_subframe_lpc(s, decoded, residual_size, pred_order);
@@ -231,12 +271,7 @@ int shorten_decode_frame(ShortenContext *s,
}
}
- for (i=-s->nwrap; i<0; i++)
- decoded[i] = decoded[i + s->blocksize];
-
- int scale = s->bitshift + SHN_OUTPUT_DEPTH - s->bits_per_sample;
- for (i = 0; i < s->blocksize; i++)
- decoded[i] <<= scale;
+ fix_bitshift(s, decoded);
break;
}
@@ -244,29 +279,88 @@ int shorten_decode_frame(ShortenContext *s,
i = get_ur_golomb_shorten(&s->gb, VERBATIM_CKSIZE_SIZE);
while (i--)
get_ur_golomb_shorten(&s->gb, VERBATIM_BYTE_SIZE);
- return 4;
break;
case FN_BITSHIFT:
s->bitshift = get_ur_golomb_shorten(&s->gb, BITSHIFTSIZE);
- return 3;
break;
case FN_BLOCKSIZE:
s->blocksize = get_uint(s, av_log2(s->blocksize));
- return 2;
break;
case FN_QUIT:
- return 1;
break;
default:
- return -1;
+ return FN_ERROR;
break;
}
- return 0;
+ return cmd;
+}
+
+int shorten_decode_frames(ShortenContext *s, int *nsamples,
+ int32_t *decoded0, int32_t *decoded1,
+ int32_t *offset0, int32_t *offset1,
+ uint8_t *buf, int buf_size,
+ void (*yield)(void))
+{
+ int32_t *decoded, *offset;
+ int cmd;
+
+ *nsamples = 0;
+
+ init_get_bits(&s->gb, buf, buf_size*8);
+ get_bits(&s->gb, s->bitindex);
+
+ int n = 0;
+ while (n < NUM_DEC_LOOPS) {
+ int chan = n%2;
+ if (chan == 0) {
+ decoded = decoded0 + s->nwrap + *nsamples;
+ offset = offset0;
+ } else {
+ decoded = decoded1 + s->nwrap + *nsamples;
+ offset = offset1;
+ }
+
+ yield();
+
+ cmd = shorten_decode_frame(s, decoded, offset);
+
+ if (cmd == FN_VERBATIM || cmd == FN_BITSHIFT || cmd == FN_BLOCKSIZE) {
+ continue;
+ } else if (cmd == FN_QUIT || cmd == FN_ERROR) {
+ break;
+ }
+
+ *nsamples += chan * s->blocksize;
+ n++;
+ }
+
+ if (*nsamples) {
+ /* Wrap the samples for the next loop */
+ int i;
+ for (i = 0; i < s->nwrap; i++) {
+ decoded0[i] = decoded0[*nsamples + i];
+ decoded1[i] = decoded1[*nsamples + i];
+ }
+
+ /* Scale the samples for the pcmbuf */
+ int scale = SHN_OUTPUT_DEPTH - s->bits_per_sample;
+#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
+ coldfire_lshift_samples(*nsamples, scale, decoded0 + s->nwrap);
+ coldfire_lshift_samples(*nsamples, scale, decoded1 + s->nwrap);
+#else
+ for (i = 0; i < *nsamples; i++) {
+ decoded0[i + s->nwrap] <<= scale;
+ decoded1[i + s->nwrap] <<= scale;
+ }
+#endif
+ }
+
+ return cmd;
}
static int decode_wave_header(ShortenContext *s,
diff --git a/apps/codecs/libffmpegFLAC/shndec.h b/apps/codecs/libffmpegFLAC/shndec.h
index 6b830dc..713a5b5 100644
--- a/apps/codecs/libffmpegFLAC/shndec.h
+++ b/apps/codecs/libffmpegFLAC/shndec.h
@@ -1,11 +1,31 @@
#include "bitstream.h"
#define SHN_OUTPUT_DEPTH 28
+
+#define MAX_CHANNELS 2
+#define MAX_PRED_ORDER 16
+#define MAX_NWRAP MAX_PRED_ORDER
+#define MAX_NMEAN 4
+
+/* NUM_DEC_LOOPS should be even number */
+#define NUM_DEC_LOOPS 26
#define DEFAULT_BLOCK_SIZE 256
-#define MAX_FRAMESIZE 1024
-#define MAX_CHANNELS 2
-#define MAX_NWRAP 3
-#define MAX_NMEAN 4
+#define MAX_HEADER_SIZE DEFAULT_BLOCK_SIZE*4
+#define MAX_BUFFER_SIZE 2*DEFAULT_BLOCK_SIZE*NUM_DEC_LOOPS
+#define MAX_DECODE_SIZE ((DEFAULT_BLOCK_SIZE*NUM_DEC_LOOPS/2) + MAX_NWRAP)
+#define MAX_OFFSET_SIZE MAX_NMEAN
+
+#define FN_DIFF0 0
+#define FN_DIFF1 1
+#define FN_DIFF2 2
+#define FN_DIFF3 3
+#define FN_QUIT 4
+#define FN_BLOCKSIZE 5
+#define FN_BITSHIFT 6
+#define FN_QLPC 7
+#define FN_ZERO 8
+#define FN_VERBATIM 9
+#define FN_ERROR 10
typedef struct ShortenContext {
GetBitContext gb;
@@ -17,20 +37,16 @@ typedef struct ShortenContext {
int bits_per_sample;
int version;
int bitshift;
+ int last_bitshift;
int nmean;
int nwrap;
int blocksize;
int bitindex;
-/* Not needed...
- int bit_rate;
- int block_align;
- int chunk_size;
-*/
} ShortenContext;
int shorten_init(ShortenContext* s, uint8_t *buf, int buf_size);
-int shorten_decode_frame(ShortenContext *s,
- int32_t *decoded,
- int32_t *offset,
- uint8_t *buf,
- int buf_size) ICODE_ATTR;
+int shorten_decode_frames(ShortenContext *s, int *nsamples,
+ int32_t *decoded0, int32_t *decoded1,
+ int32_t *offset0, int32_t *offset1,
+ uint8_t *buf, int buf_size,
+ void (*yield)(void)) ICODE_ATTR;
diff --git a/apps/codecs/shorten.c b/apps/codecs/shorten.c
index 290686e..3edc143 100644
--- a/apps/codecs/shorten.c
+++ b/apps/codecs/shorten.c
@@ -33,13 +33,13 @@ extern char iend[];
struct codec_api* rb;
struct codec_api* ci;
-#define MAX_DECODED (DEFAULT_BLOCK_SIZE + MAX_NWRAP)
-int32_t decoded0[MAX_DECODED] IBSS_ATTR;
-int32_t decoded1[MAX_DECODED] IBSS_ATTR;
+int32_t decoded0[MAX_DECODE_SIZE] IBSS_ATTR;
+int32_t decoded1[MAX_DECODE_SIZE] IBSS_ATTR;
-#define MAX_OFFSETS MAX_NMEAN
-int32_t offset0[MAX_OFFSETS] IBSS_ATTR;
-int32_t offset1[MAX_OFFSETS] IBSS_ATTR;
+int32_t offset0[MAX_OFFSET_SIZE] IBSS_ATTR;
+int32_t offset1[MAX_OFFSET_SIZE] IBSS_ATTR;
+
+int8_t ibuf[MAX_BUFFER_SIZE] IBSS_ATTR;
/* this is the codec entry point */
enum codec_status codec_start(struct codec_api* api)
@@ -48,9 +48,8 @@ enum codec_status codec_start(struct codec_api* api)
uint32_t samplesdone;
uint32_t elapsedtime;
int8_t *buf;
- int cur_chan, consumed, res;
+ int consumed, res, nsamples;
long bytesleft;
- int retval;
/* Generic codec initialisation */
rb = api;
@@ -72,9 +71,8 @@ enum codec_status codec_start(struct codec_api* api)
next_track:
/* Codec initialization */
if (codec_init(api)) {
- LOGF("Shorten: Error initialising codec\n");
- retval = CODEC_ERROR;
- goto exit;
+ LOGF("Shorten: codec_init error\n");
+ return CODEC_ERROR;
}
while (!*ci->taginfo_ready)
@@ -90,12 +88,11 @@ next_track:
}
/* Read the shorten & wave headers */
- buf = ci->request_buffer(&bytesleft, MAX_FRAMESIZE);
+ buf = ci->request_buffer(&bytesleft, MAX_HEADER_SIZE);
res = shorten_init(&sc, (unsigned char *)buf, bytesleft);
if (res < 0) {
- LOGF("shorten_init error: %d\n", res);
- retval = CODEC_ERROR;
- goto exit;
+ LOGF("Shorten: shorten_init error: %d\n", res);
+ return CODEC_ERROR;
}
ci->id3->frequency = sc.sample_rate;
@@ -117,14 +114,13 @@ next_track:
seek_start:
/* The main decoding loop */
- ci->memset(&decoded0, 0, sizeof(int32_t)*MAX_DECODED);
- ci->memset(&decoded1, 0, sizeof(int32_t)*MAX_DECODED);
- ci->memset(&offset0, 0, sizeof(int32_t)*MAX_OFFSETS);
- ci->memset(&offset1, 0, sizeof(int32_t)*MAX_OFFSETS);
+ ci->memset(&decoded0, 0, sizeof(int32_t)*MAX_DECODE_SIZE);
+ ci->memset(&decoded1, 0, sizeof(int32_t)*MAX_DECODE_SIZE);
+ ci->memset(&offset0, 0, sizeof(int32_t)*MAX_OFFSET_SIZE);
+ ci->memset(&offset1, 0, sizeof(int32_t)*MAX_OFFSET_SIZE);
- cur_chan = 0;
samplesdone = 0;
- buf = ci->request_buffer(&bytesleft, MAX_FRAMESIZE);
+ buf = ci->request_buffer(&bytesleft, MAX_BUFFER_SIZE);
while (bytesleft) {
ci->yield();
if (ci->stop_codec || ci->reload_codec) {
@@ -143,51 +139,43 @@ seek_start:
}
/* Decode a frame */
- ci->yield();
- if (cur_chan == 0) {
- res = shorten_decode_frame(&sc, decoded0 + sc.nwrap, offset0,
- (unsigned char *)buf, bytesleft);
+ ci->memcpy(ibuf, buf, bytesleft); /* copy buf to iram */
+ res = shorten_decode_frames(&sc, &nsamples, decoded0, decoded1,
+ offset0, offset1, (unsigned char *)ibuf,
+ bytesleft, ci->yield);
+
+ if (res == FN_ERROR) {
+ LOGF("Shorten: shorten_decode_frames error (%d)\n", samplesdone);
+ return CODEC_ERROR;
} else {
- res = shorten_decode_frame(&sc, decoded1 + sc.nwrap, offset1,
- (unsigned char *)buf, bytesleft);
- }
- cur_chan++;
-
- if (res == 0 && cur_chan == sc.channels) {
- cur_chan = 0;
-
/* Insert decoded samples in pcmbuf */
- ci->yield();
- while (!ci->pcmbuf_insert_split((char*)(decoded0 + sc.nwrap),
- (char*)(decoded1 + sc.nwrap), sc.blocksize*4)) {
+ if (nsamples) {
ci->yield();
+ while (!ci->pcmbuf_insert_split((char*)(decoded0 + sc.nwrap),
+ (char*)(decoded1 + sc.nwrap),
+ 4*nsamples)) {
+ ci->yield();
+ }
+
+ /* Update the elapsed-time indicator */
+ samplesdone += nsamples;
+ elapsedtime = (samplesdone*10) / (sc.sample_rate/100);
+ ci->set_elapsed(elapsedtime);
}
- /* Update the elapsed-time indicator */
- samplesdone += sc.blocksize;
- elapsedtime = (samplesdone*10) / (sc.sample_rate/100);
- ci->set_elapsed(elapsedtime);
- } else if (res == 1) {
/* End of shorten stream...go to next track */
- break;
- } else if (res < 0) {
- LOGF("shorten_decode_frame error: \n", res);
- retval = CODEC_ERROR;
- goto exit;
+ if (res == FN_QUIT)
+ break;
}
consumed = sc.gb.index/8;
ci->advance_buffer(consumed);
+ buf = ci->request_buffer(&bytesleft, MAX_BUFFER_SIZE);
sc.bitindex = sc.gb.index - 8*consumed;
- buf = ci->request_buffer(&bytesleft, MAX_FRAMESIZE);
}
- LOGF("Shorten: Decoded %d samples\n", samplesdone);
-
if (ci->request_next_track())
goto next_track;
- retval = CODEC_OK;
-exit:
- return retval;
+ return CODEC_OK;
}