From 92785b8f2f20b0fc16de7e771e5eb55fd8497ff8 Mon Sep 17 00:00:00 2001
From: Andrew Mahone <andrew.mahone@gmail.com>
Date: Tue, 26 May 2009 20:00:47 +0000
Subject: Use pre-multiplication in scaler to save one multiply per color
 component on ARM and Coldfire, at the cost of an extra add/shift in the
 horizontal scaler to reduce values to a workable range. SH-1 retains the same
 basic math, as the use of 16x16->32 hardware multiplication in the earlier
 scaler stages saves more than removing the 32x32->40 multiply to descale
 output.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21091 a1c6a512-1295-4272-9138-f99709370657
---
 apps/plugins/bench_scaler.c            |  4 +--
 apps/plugins/lib/grey_draw.c           |  2 +-
 apps/plugins/pictureflow/pictureflow.c | 49 +++++++++++-----------------------
 3 files changed, 19 insertions(+), 36 deletions(-)

(limited to 'apps/plugins')

diff --git a/apps/plugins/bench_scaler.c b/apps/plugins/bench_scaler.c
index c24807d..246271d 100644
--- a/apps/plugins/bench_scaler.c
+++ b/apps/plugins/bench_scaler.c
@@ -49,8 +49,8 @@ static void output_row_null(uint32_t row, void * row_in,
 #else
     uint32_t *lim = in + ctx->bm->width;
 #endif
-    for (; in < lim; in++)
-        output = SC_MUL(*in + ctx->round, ctx->divisor);
+    while (in < lim)
+        output = SC_OUT(*in++, ctx);
     return;
 }
 
diff --git a/apps/plugins/lib/grey_draw.c b/apps/plugins/lib/grey_draw.c
index 6315ad9..c1e6376 100644
--- a/apps/plugins/lib/grey_draw.c
+++ b/apps/plugins/lib/grey_draw.c
@@ -733,7 +733,7 @@ static void output_row_grey_32(uint32_t row, void * row_in,
     uint32_t *qp = (uint32_t*)row_in;
     uint8_t *dest = (uint8_t*)ctx->bm->data + ctx->bm->width * row;
     for (col = 0; col < ctx->bm->width; col++)
-        *dest++ = SC_MUL((*qp++) + ctx->round,ctx->divisor);
+        *dest++ = SC_OUT(*qp++, ctx);
 }
 
 static unsigned int get_size_grey(struct bitmap *bm)
diff --git a/apps/plugins/pictureflow/pictureflow.c b/apps/plugins/pictureflow/pictureflow.c
index a1ad3d2..bbe2541 100644
--- a/apps/plugins/pictureflow/pictureflow.c
+++ b/apps/plugins/pictureflow/pictureflow.c
@@ -592,25 +592,12 @@ static inline PFreal fcos(int iangle)
     return fsin(iangle + (IANGLE_MAX >> 2));
 }
 
-static inline uint32_t div255(uint32_t val)
+static inline unsigned scale_val(unsigned val, unsigned bits)
 {
-    return ((((val >> 8) + val) >> 8) + val) >> 8;
+    val = val * ((1 << bits) - 1);
+    return ((val >> 8) + val + 128) >> 8;
 }
 
-#define SCALE_VAL(val,out) div255((val) * (out) + 127)
-#define SCALE_VAL32(val, out) \
-({ \
-    uint32_t val__ = (val) * (out); \
-    val__ = ((((val__ >> 8) + val__) >> 8) + val__ + 128) >> 8; \
-    val__; \
-})
-#define SCALE_VAL8(val, out) \
-({ \
-    unsigned val__ = (val) * (out); \
-    val__ = ((val__ >> 8) + val__ + 128) >> 8; \
-    val__; \
-})
-
 static void output_row_8_transposed(uint32_t row, void * row_in,
                                        struct scaler_context *ctx)
 {
@@ -625,9 +612,9 @@ static void output_row_8_transposed(uint32_t row, void * row_in,
     unsigned r, g, b;
     for (; dest < end; dest += ctx->bm->height)
     {
-        r = SCALE_VAL8(qp->red, 31);
-        g = SCALE_VAL8(qp->green, 63);
-        b = SCALE_VAL8((qp++)->blue, 31);
+        r = scale_val(qp->red, 5);
+        g = scale_val(qp->green, 6);
+        b = scale_val((qp++)->blue, 5);
         *dest = LCD_RGBPACK_LCD(r,g,b);
     }
 #endif
@@ -641,19 +628,15 @@ static void output_row_32_transposed(uint32_t row, void * row_in,
 #ifdef USEGSLIB
     uint32_t *qp = (uint32_t*)row_in;
     for (; dest < end; dest += ctx->bm->height)
-        *dest = SC_MUL((*qp++) + ctx->round, ctx->divisor);
+        *dest = SC_OUT(*qp++, ctx);
 #else
     struct uint32_rgb *qp = (struct uint32_rgb*)row_in;
-    uint32_t rb_mul = SCALE_VAL32(ctx->divisor, 31),
-             rb_rnd = SCALE_VAL32(ctx->round, 31),
-             g_mul = SCALE_VAL32(ctx->divisor, 63),
-             g_rnd = SCALE_VAL32(ctx->round, 63);
     int r, g, b;
     for (; dest < end; dest += ctx->bm->height)
     {
-        r = SC_MUL(qp->r + rb_rnd, rb_mul);
-        g = SC_MUL(qp->g + g_rnd, g_mul);
-        b = SC_MUL(qp->b + rb_rnd, rb_mul);
+        r = scale_val(SC_OUT(qp->r, ctx), 5);
+        g = scale_val(SC_OUT(qp->g, ctx), 6);
+        b = scale_val(SC_OUT(qp->b, ctx), 5);
         qp++;
         *dest = LCD_RGBPACK_LCD(r,g,b);
     }
@@ -670,14 +653,14 @@ static void output_row_32_transposed_fromyuv(uint32_t row, void * row_in,
     for (; dest < end; dest += ctx->bm->height)
     {
         unsigned r, g, b, y, u, v;
-        y = SC_MUL(qp->b + ctx->round, ctx->divisor);
-        u = SC_MUL(qp->g + ctx->round, ctx->divisor);
-        v = SC_MUL(qp->r + ctx->round, ctx->divisor);
+        y = SC_OUT(qp->b, ctx);
+        u = SC_OUT(qp->g, ctx);
+        v = SC_OUT(qp->r, ctx);
         qp++;
         yuv_to_rgb(y, u, v, &r, &g, &b);
-        r = (31 * r + (r >> 3) + 127) >> 8;
-        g = (63 * g + (g >> 2) + 127) >> 8;
-        b = (31 * b + (b >> 3) + 127) >> 8;
+        r = scale_val(r, 5);
+        g = scale_val(g, 6);
+        b = scale_val(b, 5);
         *dest = LCD_RGBPACK_LCD(r, g, b);
     }
 }
-- 
cgit v1.1