mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-29 07:42:04 +00:00
79f4605242
This gives significantly faster bilinear sampling when we don't have NEON. --HG-- extra : rebase_source : 4473dd220020e65e4bc22b84fa780c7828b357a7
288 lines
10 KiB
Diff
288 lines
10 KiB
Diff
changeset: 94061:73a9b24d863a
|
|
tag: bilin
|
|
tag: qbase
|
|
tag: qtip
|
|
tag: tip
|
|
user: Jeff Muizelaar <jmuizelaar@mozilla.com>
|
|
date: Tue May 15 18:26:16 2012 -0400
|
|
summary: Bug 754364. Add bilinear non-repeat and repeat fast paths. r=joe
|
|
|
|
diff --git a/gfx/cairo/libpixman/src/pixman-fast-path.c b/gfx/cairo/libpixman/src/pixman-fast-path.c
|
|
--- a/gfx/cairo/libpixman/src/pixman-fast-path.c
|
|
+++ b/gfx/cairo/libpixman/src/pixman-fast-path.c
|
|
@@ -1186,16 +1186,228 @@ FAST_NEAREST (8888_565_none, 8888, 0565,
|
|
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
|
|
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
|
|
FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
|
|
FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
|
|
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
|
|
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
|
|
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
|
|
|
|
+static force_inline void
|
|
+scaled_bilinear_scanline_8888_565_OVER (uint16_t * dst,
|
|
+ const uint32_t * mask,
|
|
+ const uint32_t * src_top,
|
|
+ const uint32_t * src_bottom,
|
|
+ int32_t w,
|
|
+ int wt,
|
|
+ int wb,
|
|
+ pixman_fixed_t vx,
|
|
+ pixman_fixed_t unit_x,
|
|
+ pixman_fixed_t max_vx,
|
|
+ pixman_bool_t zero_src)
|
|
+{
|
|
+ while ((w -= 1) >= 0)
|
|
+ {
|
|
+ uint32_t tl = src_top [pixman_fixed_to_int (vx)];
|
|
+ uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
|
|
+ uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
|
|
+ uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
|
|
+ uint32_t src, result;
|
|
+ uint16_t d;
|
|
+ d = *dst;
|
|
+ src = bilinear_interpolation (tl, tr,
|
|
+ bl, br,
|
|
+ interpolation_coord(vx),
|
|
+ wb >> (8 - INTERPOLATION_PRECISION_BITS));
|
|
+ vx += unit_x;
|
|
+ result = over (src, CONVERT_0565_TO_0888 (d));
|
|
+ *dst++ = CONVERT_8888_TO_0565(result);
|
|
+ }
|
|
+}
|
|
+
|
|
+static force_inline void
|
|
+scaled_bilinear_scanline_8888_8888_OVER (uint32_t * dst,
|
|
+ const uint32_t * mask,
|
|
+ const uint32_t * src_top,
|
|
+ const uint32_t * src_bottom,
|
|
+ int32_t w,
|
|
+ int wt,
|
|
+ int wb,
|
|
+ pixman_fixed_t vx,
|
|
+ pixman_fixed_t unit_x,
|
|
+ pixman_fixed_t max_vx,
|
|
+ pixman_bool_t zero_src)
|
|
+{
|
|
+ while ((w -= 1) >= 0)
|
|
+ {
|
|
+ uint32_t tl = src_top [pixman_fixed_to_int (vx)];
|
|
+ uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
|
|
+ uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
|
|
+ uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
|
|
+ uint32_t src;
|
|
+ uint32_t d;
|
|
+ uint32_t result;
|
|
+ d = *dst;
|
|
+ src = bilinear_interpolation (tl, tr,
|
|
+ bl, br,
|
|
+ interpolation_coord(vx),
|
|
+ wb >> (8 - INTERPOLATION_PRECISION_BITS));
|
|
+ vx += unit_x;
|
|
+ *dst++ = over (src, d);
|
|
+ }
|
|
+}
|
|
+
|
|
+#if 1
|
|
+
|
|
+static force_inline void
|
|
+scaled_bilinear_scanline_565_565_SRC (uint16_t * dst,
|
|
+ const uint32_t * mask,
|
|
+ const uint16_t * src_top,
|
|
+ const uint16_t * src_bottom,
|
|
+ int32_t w,
|
|
+ int wt,
|
|
+ int wb,
|
|
+ pixman_fixed_t vx,
|
|
+ pixman_fixed_t unit_x,
|
|
+ pixman_fixed_t max_vx,
|
|
+ pixman_bool_t zero_src)
|
|
+{
|
|
+ while ((w -= 1) >= 0)
|
|
+ {
|
|
+ uint16_t tl = src_top [pixman_fixed_to_int (vx)];
|
|
+ uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
|
|
+ uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
|
|
+ uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
|
|
+ uint32_t d;
|
|
+ d = bilinear_interpolation(CONVERT_0565_TO_8888(tl),
|
|
+ CONVERT_0565_TO_8888(tr),
|
|
+ CONVERT_0565_TO_8888(bl),
|
|
+ CONVERT_0565_TO_8888(br),
|
|
+ interpolation_coord(vx),
|
|
+ wb >> (8 - INTERPOLATION_PRECISION_BITS));
|
|
+ vx += unit_x;
|
|
+ *dst++ = CONVERT_8888_TO_0565(d);
|
|
+ }
|
|
+}
|
|
+
|
|
+#else
|
|
+
|
|
+#define SK_G16_MASK_IN_PLACE 0xfc0
|
|
+
|
|
+static inline uint32_t SkExpand_rgb_16(uint16_t c) {
|
|
+
|
|
+ return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
|
|
+}
|
|
+
|
|
+/** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
|
|
+ color value. The computation yields only 16bits of valid data, but we claim
|
|
+ to return 32bits, so that the compiler won't generate extra instructions to
|
|
+ "clean" the top 16bits. However, the top 16 can contain garbage, so it is
|
|
+ up to the caller to safely ignore them.
|
|
+*/
|
|
+static inline uint16_t SkCompact_rgb_16(uint32_t c) {
|
|
+ return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
|
|
+}
|
|
+// returns expanded * 5bits
|
|
+static inline uint32_t Filter_565_Expanded(unsigned x, unsigned y,
|
|
+ uint32_t a00, uint32_t a01,
|
|
+ uint32_t a10, uint32_t a11) {
|
|
+ a00 = SkExpand_rgb_16(a00);
|
|
+ a01 = SkExpand_rgb_16(a01);
|
|
+ a10 = SkExpand_rgb_16(a10);
|
|
+ a11 = SkExpand_rgb_16(a11);
|
|
+
|
|
+ int xy = x * y >> 3;
|
|
+ return a00 * (32 - 2*y - 2*x + xy) +
|
|
+ a01 * (2*x - xy) +
|
|
+ a10 * (2*y - xy) +
|
|
+ a11 * xy;
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+static force_inline void
|
|
+scaled_bilinear_scanline_565_565_SRC (uint16_t * dst,
|
|
+ const uint32_t * mask,
|
|
+ const uint16_t * src_top,
|
|
+ const uint16_t * src_bottom,
|
|
+ int32_t w,
|
|
+ int wt,
|
|
+ int wb,
|
|
+ pixman_fixed_t vx,
|
|
+ pixman_fixed_t unit_x,
|
|
+ pixman_fixed_t max_vx,
|
|
+ pixman_bool_t zero_src)
|
|
+{
|
|
+ while ((w -= 1) >= 0)
|
|
+ {
|
|
+ uint16_t tl = src_top [pixman_fixed_to_int (vx)];
|
|
+ uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
|
|
+ uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
|
|
+ uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
|
|
+
|
|
+ uint32_t tmp = Filter_565_Expanded((vx>>12)&0xf, wb>>4, tl, tr, bl, br);
|
|
+ vx += unit_x;
|
|
+ *dst++ = SkCompact_rgb_16((tmp) >> 5);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+#endif
|
|
+FAST_BILINEAR_MAINLOOP_COMMON (565_565_cover_SRC,
|
|
+ scaled_bilinear_scanline_565_565_SRC,
|
|
+ uint16_t, uint32_t, uint16_t,
|
|
+ COVER, FLAG_NONE)
|
|
+FAST_BILINEAR_MAINLOOP_COMMON (565_565_pad_SRC,
|
|
+ scaled_bilinear_scanline_565_565_SRC,
|
|
+ uint16_t, uint32_t, uint16_t,
|
|
+ PAD, FLAG_NONE)
|
|
+FAST_BILINEAR_MAINLOOP_COMMON (565_565_none_SRC,
|
|
+ scaled_bilinear_scanline_565_565_SRC,
|
|
+ uint16_t, uint32_t, uint16_t,
|
|
+ NONE, FLAG_NONE)
|
|
+FAST_BILINEAR_MAINLOOP_COMMON (565_565_normal_SRC,
|
|
+ scaled_bilinear_scanline_565_565_SRC,
|
|
+ uint16_t, uint32_t, uint16_t,
|
|
+ NORMAL, FLAG_NONE)
|
|
+
|
|
+FAST_BILINEAR_MAINLOOP_COMMON (8888_565_cover_OVER,
|
|
+ scaled_bilinear_scanline_8888_565_OVER,
|
|
+ uint32_t, uint32_t, uint16_t,
|
|
+ COVER, FLAG_NONE)
|
|
+FAST_BILINEAR_MAINLOOP_COMMON (8888_565_pad_OVER,
|
|
+ scaled_bilinear_scanline_8888_565_OVER,
|
|
+ uint32_t, uint32_t, uint16_t,
|
|
+ PAD, FLAG_NONE)
|
|
+FAST_BILINEAR_MAINLOOP_COMMON (8888_565_none_OVER,
|
|
+ scaled_bilinear_scanline_8888_565_OVER,
|
|
+ uint32_t, uint32_t, uint16_t,
|
|
+ NONE, FLAG_NONE)
|
|
+FAST_BILINEAR_MAINLOOP_COMMON (8888_565_normal_OVER,
|
|
+ scaled_bilinear_scanline_8888_565_OVER,
|
|
+ uint32_t, uint32_t, uint16_t,
|
|
+ NORMAL, FLAG_NONE)
|
|
+
|
|
+FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_OVER,
|
|
+ scaled_bilinear_scanline_8888_8888_OVER,
|
|
+ uint32_t, uint32_t, uint32_t,
|
|
+ COVER, FLAG_NONE)
|
|
+FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_OVER,
|
|
+ scaled_bilinear_scanline_8888_8888_OVER,
|
|
+ uint32_t, uint32_t, uint32_t,
|
|
+ PAD, FLAG_NONE)
|
|
+FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_OVER,
|
|
+ scaled_bilinear_scanline_8888_8888_OVER,
|
|
+ uint32_t, uint32_t, uint32_t,
|
|
+ NONE, FLAG_NONE)
|
|
+FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_normal_OVER,
|
|
+ scaled_bilinear_scanline_8888_8888_OVER,
|
|
+ uint32_t, uint32_t, uint32_t,
|
|
+ NORMAL, FLAG_NONE)
|
|
+
|
|
#define REPEAT_MIN_WIDTH 32
|
|
|
|
static void
|
|
fast_composite_tiled_repeat (pixman_implementation_t *imp,
|
|
pixman_composite_info_t *info)
|
|
{
|
|
PIXMAN_COMPOSITE_ARGS (info);
|
|
pixman_composite_func_t func;
|
|
@@ -1960,16 +2172,20 @@ static const pixman_fast_path_t c_fast_p
|
|
PIXMAN_any,
|
|
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
|
|
FAST_PATH_NORMAL_REPEAT),
|
|
PIXMAN_any, 0,
|
|
PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
|
|
fast_composite_tiled_repeat
|
|
},
|
|
|
|
+ SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
|
|
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
|
|
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
|
|
+
|
|
{ PIXMAN_OP_NONE },
|
|
};
|
|
|
|
#ifdef WORDS_BIGENDIAN
|
|
#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n)))
|
|
#else
|
|
#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs))
|
|
#endif
|
|
diff --git a/gfx/cairo/libpixman/src/pixman-inlines.h b/gfx/cairo/libpixman/src/pixman-inlines.h
|
|
--- a/gfx/cairo/libpixman/src/pixman-inlines.h
|
|
+++ b/gfx/cairo/libpixman/src/pixman-inlines.h
|
|
@@ -80,16 +80,21 @@ repeat (pixman_repeat_t repeat, int *c,
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
#ifdef MOZ_GFX_OPTIMIZE_MOBILE
|
|
#define LOW_QUALITY_INTERPOLATION
|
|
#endif
|
|
|
|
+#ifdef LOW_QUALITY_INTERPOLATION
|
|
+#define INTERPOLATION_PRECISION_BITS 4
|
|
+#else
|
|
+#define INTERPOLATION_PRECISION_BITS 8
|
|
+#endif
|
|
static force_inline int32_t
|
|
interpolation_coord(pixman_fixed_t t)
|
|
{
|
|
#ifdef LOW_QUALITY_INTERPOLATION
|
|
return (t >> 12) & 0xf;
|
|
#else
|
|
return (t >> 8) & 0xff;
|
|
#endif
|