summaryrefslogtreecommitdiff
path: root/apps/recorder/resize.h
diff options
context:
space:
mode:
authorAndrew Mahone <andrew.mahone@gmail.com>2009-05-26 20:00:47 +0000
committerAndrew Mahone <andrew.mahone@gmail.com>2009-05-26 20:00:47 +0000
commit92785b8f2f20b0fc16de7e771e5eb55fd8497ff8 (patch)
tree3af2399c1e1be8e56cb1b5e6787efd738dad6d52 /apps/recorder/resize.h
parentc4ed88f59302882749023268ac456c415a4b1243 (diff)
downloadrockbox-92785b8f2f20b0fc16de7e771e5eb55fd8497ff8.zip
rockbox-92785b8f2f20b0fc16de7e771e5eb55fd8497ff8.tar.gz
rockbox-92785b8f2f20b0fc16de7e771e5eb55fd8497ff8.tar.bz2
rockbox-92785b8f2f20b0fc16de7e771e5eb55fd8497ff8.tar.xz
Use pre-multiplication in scaler to save one multiply per color component on ARM and Coldfire, at the cost of an extra add/shift in the horizontal scaler to reduce values to a workable range. SH-1 retains the same basic math, as
the use of 16x16->32 hardware multiplication in the earlier scaler stages saves more than removing the 32x32->40 multiply to descale output. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21091 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/recorder/resize.h')
-rw-r--r--apps/recorder/resize.h100
1 files changed, 50 insertions, 50 deletions
diff --git a/apps/recorder/resize.h b/apps/recorder/resize.h
index 2964fcd..ef32066 100644
--- a/apps/recorder/resize.h
+++ b/apps/recorder/resize.h
@@ -43,67 +43,61 @@
#define MAX_SC_STACK_ALLOC 0
#define HAVE_UPSCALER 1
-#if defined(CPU_COLDFIRE)
-#define SC_MUL_INIT \
- unsigned long macsr_st = coldfire_get_macsr(); \
- coldfire_set_macsr(EMAC_UNSIGNED);
-#define SC_MUL_END coldfire_set_macsr(macsr_st);
-#define SC_MUL(x, y) \
-({ \
- unsigned long t; \
- asm ("mac.l %[a], %[b], %%acc0\n\t" \
- "move.l %%accext01, %[t]\n\t" \
- "move.l #0, %%acc0\n\t" \
- : [t] "=r" (t) : [a] "r" (x), [b] "r" (y)); \
- t; \
-})
-#elif (CONFIG_CPU == SH7034)
-/* multiply two unsigned 32 bit values and return the top 32 bit
- * of the 64 bit result */
-static inline unsigned sc_mul32(unsigned a, unsigned b)
+#if defined(CPU_SH)
+/* perform 32x32->40 unsigned multiply, round off and return top 8 bits */
+static inline uint32_t sc_mul_u32_rnd(uint32_t m, uint32_t n)
{
unsigned r, t1, t2, t3;
-
+ unsigned h = 1 << 15;
+ /* notation:
+ m = ab, n = cd
+ final result is (((a *c) << 32) + ((b * c + a * d) << 16) + b * d +
+ (1 << 31)) >> 32
+ */
asm (
- "swap.w %[a], %[t1] \n" /* t1 = ba */
- "mulu %[t1], %[b] \n" /* a * d */
- "swap.w %[b], %[t3] \n" /* t3 = dc */
- "sts macl, %[t2] \n" /* t2 = a * d */
- "mulu %[t1], %[t3] \n" /* a * c */
- "sts macl, %[r] \n" /* hi = a * c */
- "mulu %[a], %[t3] \n" /* b * c */
- "clrt \n"
- "sts macl, %[t3] \n" /* t3 = b * c */
- "addc %[t2], %[t3] \n" /* t3 += t2, carry -> t2 */
- "movt %[t2] \n"
- "mulu %[a], %[b] \n" /* b * d */
- "mov %[t3], %[t1] \n" /* t1t3 = t2t3 << 16 */
- "xtrct %[t2], %[t1] \n"
- "shll16 %[t3] \n"
- "sts macl, %[t2] \n" /* lo = b * d */
- "clrt \n" /* hi.lo += t1t3 */
- "addc %[t3], %[t2] \n"
- "addc %[t1], %[r] \n"
+ "swap.w %[m], %[t1]\n\t" /* t1 = ba */
+ "mulu %[m], %[n]\n\t" /* b * d */
+ "swap.w %[n], %[t3]\n\t" /* t3 = dc */
+ "sts macl, %[r]\n\t" /* r = b * d */
+ "mulu %[m], %[t3]\n\t" /* b * c */
+ "shlr16 %[r]\n\t"
+ "sts macl, %[t2]\n\t" /* t2 = b * c */
+ "mulu %[t1], %[t3]\n\t" /* a * c */
+ "add %[t2], %[r]\n\t"
+ "sts macl, %[t3]\n\t" /* t3 = a * c */
+ "mulu %[t1], %[n]\n\t" /* a * d */
+ "shll16 %[t3]\n\t"
+ "sts macl, %[t2]\n\t" /* t2 = a * d */
+ "add %[t2], %[r]\n\t"
+ "add %[t3], %[r]\n\t" /* r = ((b * d) >> 16) + (b * c + a * d) +
+ ((a * c) << 16) */
+ "add %[h], %[r]\n\t" /* round result */
+ "shlr16 %[r]\n\t" /* truncate result */
: /* outputs */
[r] "=&r"(r),
[t1]"=&r"(t1),
[t2]"=&r"(t2),
[t3]"=&r"(t3)
: /* inputs */
- [a] "r" (a),
- [b] "r" (b)
+ [h] "r" (h),
+ [m] "r" (m),
+ [n] "r" (n)
);
return r;
}
-#define SC_MUL(x, y) sc_mul32(x, y)
-#define SC_MUL_INIT
-#define SC_MUL_END
+#elif defined(TEST_SH_MATH)
+static inline uint32_t sc_mul_u32_rnd(uint32_t op1, uint32_t op2)
+{
+ uint64_t tmp = (uint64_t)op1 * op2;
+ tmp += 1LU << 31;
+ tmp >>= 32;
+ return tmp;
+}
+#else
+#define SC_OUT(n, c) (((n) + (1 << 23)) >> 24)
#endif
-
-#ifndef SC_MUL
-#define SC_MUL(x, y) ((x) * (uint64_t)(y) >> 32)
-#define SC_MUL_INIT
-#define SC_MUL_END
+#ifndef SC_OUT
+#define SC_OUT(n, c) (sc_mul_u32_rnd(n, (c)->recip))
#endif
struct img_part {
@@ -130,8 +124,14 @@ struct uint32_rgb {
horizontal scaler, and row output
*/
struct scaler_context {
- uint32_t divisor;
- uint32_t round;
+#if defined(CPU_SH) || defined(TEST_SH_MATH)
+ uint32_t recip;
+#else
+ uint32_t h_i_val;
+ uint32_t h_o_val;
+ uint32_t v_i_val;
+ uint32_t v_o_val;
+#endif
struct bitmap *bm;
struct dim *src;
unsigned char *buf;