From f4774bf5bf8fff2b45a6cee8e0b064b560b45809 Mon Sep 17 00:00:00 2001 From: Andree Buschmann Date: Sat, 29 Aug 2009 19:44:27 +0000 Subject: Submit interim version of FS#10565. Performance optimization of atrac3 decoder for ARM. Introduce ASM routines for multiplications and two synthesis loops, refactured parts of synthesis and windowing. Speeds up decoding by a factor of 2.4 on PP502x. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@22548 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libatrac/fixp_math.h | 92 +++++++++++++++++++++++++++++++--------- 1 file changed, 71 insertions(+), 21 deletions(-) (limited to 'apps/codecs/libatrac/fixp_math.h') diff --git a/apps/codecs/libatrac/fixp_math.h b/apps/codecs/libatrac/fixp_math.h index 8a734f6..88cb5e4 100644 --- a/apps/codecs/libatrac/fixp_math.h +++ b/apps/codecs/libatrac/fixp_math.h @@ -10,27 +10,77 @@ /* Fixed point math routines for use in atrac3.c */ -static inline int32_t fixmul16(int32_t x, int32_t y) -{ - int64_t temp; - temp = x; - temp *= y; - - temp >>= 16; - - return (int32_t)temp; -} - -static inline int32_t fixmul31(int32_t x, int32_t y) -{ - int64_t temp; - temp = x; - temp *= y; - - temp >>= 31; //16+31-16 = 31 bits - - return (int32_t)temp; -} +#if defined(CPU_ARM) + #define fixmul16(X,Y) \ + ({ \ + int32_t low; \ + int32_t high; \ + asm volatile ( /* calculates: result = (X*Y)>>16 */ \ + "smull %0,%1,%2,%3 \n\t" /* 64 = 32x32 multiply */ \ + "mov %0, %0, lsr #16 \n\t" /* %0 = %0 >> 16 */ \ + "orr %0, %0, %1, lsl #16 \n\t"/* result = %0 OR (%1 << 16) */ \ + : "=&r"(low), "=&r" (high) \ + : "r"(X),"r"(Y)); \ + low; \ + }) + + #define fixmul31(X,Y) \ + ({ \ + int32_t low; \ + int32_t high; \ + asm volatile ( /* calculates: result = (X*Y)>>31 */ \ + "smull %0,%1,%2,%3 \n\t" /* 64 = 32x32 multiply */ \ + "mov %0, %0, lsr #31 \n\t" /* %0 = %0 >> 31 */ \ + "orr %0, %0, %1, lsl #1 \n\t" /* result = %0 OR (%1 << 1) */ \ + : "=&r"(low), "=&r" (high) \ + : "r"(X),"r"(Y)); \ + low; \ + }) + + #define fixmul32(X,Y) \ + ({ \ + int32_t low; \ + int32_t high; \ + asm volatile ( /* calculates: result = (X*Y)>>32 */ \ + "smull %0,%1,%2,%3 \n\t" /* 64 = 32x32 multiply */ \ + : "=&r"(low), "=&r" (high) \ + : "r"(X),"r"(Y)); \ + high; \ + }) +#else + static inline int32_t fixmul16(int32_t x, int32_t y) + { + int64_t temp; + temp = x; + temp *= y; + + temp >>= 16; + + return (int32_t)temp; + } + + static inline int32_t fixmul31(int32_t x, int32_t y) + { + int64_t temp; + temp = x; + temp *= y; + + temp >>= 31; //16+31-16 = 31 bits + + return (int32_t)temp; + } + + static inline int32_t fixmul32(int32_t x, int32_t y) + { + int64_t temp; + temp = x; + temp *= y; + + temp >>= 32; //16+31-16 = 31 bits + + return (int32_t)temp; + } +#endif static inline int32_t fixdiv16(int32_t x, int32_t y) { -- cgit v1.1