bug 488951. Fix YUV conversion to deal with odd-size video frames. rs=roc

--HG--
extra : rebase_source : fdeb49a21a33103fe1591a3399b44cf4107c90d4
This commit is contained in:
Matthew Gregan 2009-05-20 14:46:58 +12:00
parent a45c2d01f1
commit bd64741ff1
11 changed files with 824 additions and 372 deletions

View File

@ -5,12 +5,12 @@ skip-if(MOZ_WIDGET_TOOLKIT=="gtk2") HTTP(..) == aspect-ratio-2b.xhtml aspect-rat
HTTP(..) == aspect-ratio-3a.xhtml aspect-ratio-3-ref.xhtml
HTTP(..) == aspect-ratio-3b.xhtml aspect-ratio-3-ref.xhtml
HTTP(..) == basic-1.xhtml basic-1-ref.html
random HTTP(..) == canvas-1a.xhtml basic-1-ref.html
random HTTP(..) == canvas-1b.xhtml basic-1-ref.html
HTTP(..) == canvas-1a.xhtml basic-1-ref.html
HTTP(..) == canvas-1b.xhtml basic-1-ref.html
== empty-1a.html empty-1-ref.html
== empty-1b.html empty-1-ref.html
random HTTP(..) == object-aspect-ratio-1a.xhtml aspect-ratio-1-ref.html
random HTTP(..) == object-aspect-ratio-1b.xhtml aspect-ratio-1-ref.html
random HTTP(..) == object-aspect-ratio-2a.xhtml aspect-ratio-2-ref.html
random HTTP(..) == object-aspect-ratio-2b.xhtml aspect-ratio-2-ref.html
HTTP(..) == object-aspect-ratio-1a.xhtml aspect-ratio-1-ref.html
HTTP(..) == object-aspect-ratio-1b.xhtml aspect-ratio-1-ref.html
skip-if(MOZ_WIDGET_TOOLKIT=="gtk2") HTTP(..) == object-aspect-ratio-2a.xhtml aspect-ratio-2-ref.html
skip-if(MOZ_WIDGET_TOOLKIT=="gtk2") HTTP(..) == object-aspect-ratio-2b.xhtml aspect-ratio-2-ref.html
skip-if(MOZ_WIDGET_TOOLKIT=="gtk2") HTTP(..) == zoomed-1.xhtml zoomed-1-ref.html

View File

@ -9,9 +9,6 @@ The git commit ID used was b4a7efa06d46596515071490cb255c3548d90371.
The following local patches have been applied:
bug485291_yuv_align: only use optimized YUV routines if video dimensions are a multiple of the
optimized routine's supported alignment.
endian: pick up NSPR's little/big endian defines in oggplay's config.h.
bug481921: fix a crash in oggplay_callback_info_prepare().
@ -22,3 +19,9 @@ bug492436: Fix for that bug cherry picked from liboggplay git commit 4b97ad.
bug493140: Fix for offsets not being used.
aspect-ratio: Adds oggplay_get_video_aspect_ratio, used for bug 480058.
bug488951: Fix for YUV conversion for odd sized frames. Cherrypicked from
upstream commits dabde8, 683f23, and 4d7581.
bug488951_fix_yuv: Additional fixes to YUV conversion that have not been
upstreamed yet.

View File

@ -1,241 +0,0 @@
diff --git a/media/liboggplay/src/liboggplay/oggplay_yuv2rgb.c b/media/liboggplay/src/liboggplay/oggplay_yuv2rgb.c
--- a/media/liboggplay/src/liboggplay/oggplay_yuv2rgb.c
+++ b/media/liboggplay/src/liboggplay/oggplay_yuv2rgb.c
@@ -55,32 +55,18 @@
#include "oggplay_yuv2rgb_x86.c"
#elif defined(__ppc__) || defined(__ppc64__)
//altivec intristics only working with -maltivec gcc flag,
//but we want runtime altivec detection, hence this has to be
//fixed!
//#include "oggplay_yuv2rgb_altivec.c"
#endif
-/**
- * yuv_convert_fptr type is a function pointer type for
- * the various yuv-rgb converters
- */
-typedef void (*yuv_convert_fptr) (const OggPlayYUVChannels *yuv,
- OggPlayRGBChannels *rgb);
-
-/* it is useless to determine each YUV conversion run
- * the cpu type/featurs, thus we save the conversion function
- * pointers
- */
-static struct OggPlayYUVConverters {
- yuv_convert_fptr yuv2rgba; /**< YUV420 to RGBA */
- yuv_convert_fptr yuv2bgra; /**< YUV420 to BGRA */
- yuv_convert_fptr yuv2argb; /**< YUV420 to ARGB */
-} yuv_conv = {NULL, NULL, NULL};
+static int yuv_initialized;
+static ogg_uint32_t cpu_features;
/**
* vanilla implementation of YUV-to-RGB conversion.
*
* - using table-lookups instead of multiplication
* - avoid CLAMPing by incorporating
*
*/
@@ -89,38 +75,42 @@ static struct OggPlayYUVConverters {
#define prec 15
static const int CoY = (int)(1.164 * (1 << prec) + 0.5);
static const int CoRV = (int)(1.596 * (1 << prec) + 0.5);
static const int CoGU = (int)(0.391 * (1 << prec) + 0.5);
static const int CoGV = (int)(0.813 * (1 << prec) + 0.5);
static const int CoBU = (int)(2.018 * (1 << prec) + 0.5);
-static int CoefsGU[256] = {0};
+static int CoefsGU[256];
static int CoefsGV[256];
static int CoefsBU[256];
static int CoefsRV[256];
static int CoefsY[256];
/**
- * Initialize the lookup-table for vanilla yuv to rgb conversion.
+ * Initialize the lookup-table for vanilla yuv to rgb conversion
+ * and the cpu_features global.
*/
static void
-init_tables()
+init_yuv_converters()
{
int i;
for(i = 0; i < 256; ++i)
{
CoefsGU[i] = -CoGU * (i - 128);
CoefsGV[i] = -CoGV * (i - 128);
CoefsBU[i] = CoBU * (i - 128);
CoefsRV[i] = CoRV * (i - 128);
CoefsY[i] = CoY * (i - 16) + (prec/2);
}
+
+ cpu_features = oc_cpu_flags_get();
+ yuv_initialized = 1;
}
#define VANILLA_YUV2RGB_PIXEL(y, ruv, guv, buv) \
r = (CoefsY[y] + ruv) >> prec; \
g = (CoefsY[y] + guv) >> prec; \
b = (CoefsY[y] + buv) >> prec; \
#define VANILLA_RGBA_OUT(out, r, g, b) \
@@ -164,102 +154,83 @@ out[3] = CLAMP(r);
YUV_CONVERT(yuv420_to_rgba_vanilla, CONVERT(VANILLA_RGBA_OUT), 2, 8, 2, 1)
YUV_CONVERT(yuv420_to_bgra_vanilla, CONVERT(VANILLA_BGRA_OUT), 2, 8, 2, 1)
YUV_CONVERT(yuv420_to_abgr_vanilla, CONVERT(VANILLA_ABGR_OUT), 2, 8, 2, 1)
YUV_CONVERT(yuv420_to_argb_vanilla, CONVERT(VANILLA_ARGB_OUT), 2, 8, 2, 1)
#undef CONVERT
#undef CLEANUP
-/**
- * Initialize the function pointers in yuv_conv.
- *
- * Initialize the function pointers in yuv_conv, based on the
- * the available CPU extensions.
- */
-static void
-init_yuv_converters(void)
-{
- ogg_uint32_t features = 0;
-
- if ( yuv_conv.yuv2rgba == NULL )
- {
- features = oc_cpu_flags_get();
-#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
-#if defined(ATTRIBUTE_ALIGNED_MAX) && ATTRIBUTE_ALIGNED_MAX >= 16
- if (features & (OC_CPU_X86_SSE2|OC_CPU_X86_MMX|OC_CPU_X86_SSE))
- {
- yuv_conv.yuv2rgba = yuv420_to_rgba_sse2;
- yuv_conv.yuv2bgra = yuv420_to_bgra_sse2;
- yuv_conv.yuv2argb = yuv420_to_argb_sse2;
- return;
- }
- else
-#endif /* ATTRIBUTE_ALIGNED_MAX */
- if (features & (OC_CPU_X86_MMX|OC_CPU_X86_SSE))
- {
- yuv_conv.yuv2rgba = yuv420_to_rgba_mmx;
- yuv_conv.yuv2bgra = yuv420_to_bgra_mmx;
- yuv_conv.yuv2argb = yuv420_to_argb_mmx;
- return;
- }
- else if (features & OC_CPU_X86_MMX)
- {
- yuv_conv.yuv2rgba = yuv420_to_rgba_mmx;
- yuv_conv.yuv2bgra = yuv420_to_bgra_mmx;
- yuv_conv.yuv2argb = yuv420_to_argb_mmx;
- return;
- }
-#elif defined(__ppc__) || defined(__ppc64__)
- if (features & OC_CPU_PPC_ALTIVEC)
- {
- init_tables();
- yuv_conv.yuv2rgba = yuv420_to_abgr_vanilla;
- yuv_conv.yuv2bgra = yuv420_to_argb_vanilla;
- yuv_conv.yuv2argb = yuv420_to_bgra_vanilla;
- return;
- }
-#endif
- /*
- * no CPU extension was found... using vanilla converter, with respect
- * to the endianness of the host
- */
- init_tables();
-#if WORDS_BIGENDIAN || IS_BIG_ENDIAN
- yuv_conv.yuv2rgba = yuv420_to_abgr_vanilla;
- yuv_conv.yuv2bgra = yuv420_to_argb_vanilla;
- yuv_conv.yuv2argb = yuv420_to_bgra_vanilla;
-#else
- yuv_conv.yuv2rgba = yuv420_to_rgba_vanilla;
- yuv_conv.yuv2bgra = yuv420_to_bgra_vanilla;
- yuv_conv.yuv2argb = yuv420_to_argb_vanilla;
-#endif
- }
-}
-
-
void
oggplay_yuv2rgba(const OggPlayYUVChannels* yuv, OggPlayRGBChannels* rgb)
{
- if (yuv_conv.yuv2rgba == NULL)
+ if (!yuv_initialized)
init_yuv_converters();
- yuv_conv.yuv2rgba(yuv, rgb);
+#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
+#if defined(_MSC_VER) || (defined(ATTRIBUTE_ALIGNED_MAX) && ATTRIBUTE_ALIGNED_MAX >= 16)
+ if (yuv->y_width % 16 == 0 && cpu_features & OC_CPU_X86_SSE2)
+ return yuv420_to_rgba_sse2(yuv, rgb);
+#endif
+ if (yuv->y_width % 8 == 0 && cpu_features & OC_CPU_X86_MMX)
+ return yuv420_to_rgba_mmx(yuv, rgb);
+#elif defined(__ppc__) || defined(__ppc64__)
+ if (yuv->y_width % 16 == 0 && yuv->y_height % 2 == 0 && cpu_features & OC_CPU_PPC_ALTIVEC)
+ return yuv420_to_abgr_vanilla(yuv, rgb);
+#endif
+
+#if WORDS_BIGENDIAN || IS_BIG_ENDIAN
+ return yuv420_to_abgr_vanilla(yuv, rgb);
+#else
+ return yuv420_to_rgba_vanilla(yuv, rgb);
+#endif
}
void
oggplay_yuv2bgra(const OggPlayYUVChannels* yuv, OggPlayRGBChannels * rgb)
{
- if (yuv_conv.yuv2bgra == NULL)
+ if (!yuv_initialized)
init_yuv_converters();
- yuv_conv.yuv2bgra(yuv, rgb);
+#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
+#if defined(_MSC_VER) || (defined(ATTRIBUTE_ALIGNED_MAX) && ATTRIBUTE_ALIGNED_MAX >= 16)
+ if (yuv->y_width % 16 == 0 && cpu_features & OC_CPU_X86_SSE2)
+ return yuv420_to_bgra_sse2(yuv, rgb);
+#endif
+ if (yuv->y_width % 8 == 0 && cpu_features & OC_CPU_X86_MMX)
+ return yuv420_to_bgra_mmx(yuv, rgb);
+#elif defined(__ppc__) || defined(__ppc64__)
+ if (yuv->y_width % 16 == 0 && yuv->y_height % 2 == 0 && cpu_features & OC_CPU_PPC_ALTIVEC)
+ return yuv420_to_argb_vanilla(yuv, rgb);
+#endif
+
+#if WORDS_BIGENDIAN || IS_BIG_ENDIAN
+ return yuv420_to_argb_vanilla(yuv, rgb);
+#else
+ return yuv420_to_bgra_vanilla(yuv, rgb);
+#endif
}
void
oggplay_yuv2argb(const OggPlayYUVChannels* yuv, OggPlayRGBChannels * rgb)
{
- if (yuv_conv.yuv2argb == NULL)
+ if (!yuv_initialized)
init_yuv_converters();
- yuv_conv.yuv2argb(yuv, rgb);
+#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
+#if defined(_MSC_VER) || (defined(ATTRIBUTE_ALIGNED_MAX) && ATTRIBUTE_ALIGNED_MAX >= 16)
+ if (yuv->y_width % 16 == 0 && cpu_features & OC_CPU_X86_SSE2)
+ return yuv420_to_argb_sse2(yuv, rgb);
+#endif
+ if (yuv->y_width % 8 == 0 && cpu_features & OC_CPU_X86_MMX)
+ return yuv420_to_argb_mmx(yuv, rgb);
+#elif defined(__ppc__) || defined(__ppc64__)
+ if (yuv->y_width % 16 == 0 && yuv->y_height % 2 == 0 && cpu_features & OC_CPU_PPC_ALTIVEC)
+ return yuv420_to_bgra_vanilla(yuv, rgb);
+#endif
+
+#if WORDS_BIGENDIAN || IS_BIG_ENDIAN
+ return yuv420_to_bgra_vanilla(yuv, rgb);
+#else
+ return yuv420_to_argb_vanilla(yuv, rgb);
+#endif
}

View File

@ -0,0 +1,564 @@
diff --git a/media/liboggplay/src/liboggplay/oggplay_yuv2rgb.c b/media/liboggplay/src/liboggplay/oggplay_yuv2rgb.c
--- a/media/liboggplay/src/liboggplay/oggplay_yuv2rgb.c
+++ b/media/liboggplay/src/liboggplay/oggplay_yuv2rgb.c
@@ -42,76 +42,55 @@
*/
#include "oggplay_private.h"
#include "oggplay_yuv2rgb_template.h"
/* cpu extension detection */
#include "cpu.c"
-/* although we use cpu runtime detection, we still need these
- * macros as there's no way e.g. we could compile a x86 asm code
- * on a ppc machine and vica-versa
+/**
+ * yuv_convert_fptr type is a function pointer type for
+ * the various yuv-rgb converters
*/
-#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
-#include "oggplay_yuv2rgb_x86.c"
-#elif defined(__ppc__) || defined(__ppc64__)
-//altivec intristics only working with -maltivec gcc flag,
-//but we want runtime altivec detection, hence this has to be
-//fixed!
-//#include "oggplay_yuv2rgb_altivec.c"
-#endif
+typedef void (*yuv_convert_fptr) (const OggPlayYUVChannels *yuv,
+ OggPlayRGBChannels *rgb);
-static int yuv_initialized;
-static ogg_uint32_t cpu_features;
+/* it is useless to determine each YUV conversion run
+ * the cpu type/featurs, thus we save the conversion function
+ * pointers
+ */
+static struct OggPlayYUVConverters {
+ yuv_convert_fptr yuv2rgba; /**< YUV420 to RGBA */
+ yuv_convert_fptr yuv2bgra; /**< YUV420 to BGRA */
+ yuv_convert_fptr yuv2argb; /**< YUV420 to ARGB */
+} yuv_conv = {NULL, NULL, NULL};
/**
* vanilla implementation of YUV-to-RGB conversion.
*
* - using table-lookups instead of multiplication
* - avoid CLAMPing by incorporating
*
*/
-#define CLAMP(v) ((v) > 255 ? 255 : (v) < 0 ? 0 : (v))
-
#define prec 15
static const int CoY = (int)(1.164 * (1 << prec) + 0.5);
static const int CoRV = (int)(1.596 * (1 << prec) + 0.5);
static const int CoGU = (int)(0.391 * (1 << prec) + 0.5);
static const int CoGV = (int)(0.813 * (1 << prec) + 0.5);
static const int CoBU = (int)(2.018 * (1 << prec) + 0.5);
-static int CoefsGU[256];
+static int CoefsGU[256] = {0};
static int CoefsGV[256];
static int CoefsBU[256];
static int CoefsRV[256];
static int CoefsY[256];
-/**
- * Initialize the lookup-table for vanilla yuv to rgb conversion
- * and the cpu_features global.
- */
-static void
-init_yuv_converters()
-{
- int i;
-
- for(i = 0; i < 256; ++i)
- {
- CoefsGU[i] = -CoGU * (i - 128);
- CoefsGV[i] = -CoGV * (i - 128);
- CoefsBU[i] = CoBU * (i - 128);
- CoefsRV[i] = CoRV * (i - 128);
- CoefsY[i] = CoY * (i - 16) + (prec/2);
- }
-
- cpu_features = oc_cpu_flags_get();
- yuv_initialized = 1;
-}
+#define CLAMP(v) ((v) > 255 ? 255 : (v) < 0 ? 0 : (v))
#define VANILLA_YUV2RGB_PIXEL(y, ruv, guv, buv) \
r = (CoefsY[y] + ruv) >> prec; \
g = (CoefsY[y] + guv) >> prec; \
b = (CoefsY[y] + buv) >> prec; \
#define VANILLA_RGBA_OUT(out, r, g, b) \
out[0] = CLAMP(r); \
@@ -132,105 +111,155 @@ out[2] = CLAMP(g); \
out[3] = CLAMP(b);
#define VANILLA_ABGR_OUT(out, r, g, b) \
out[0] = 255; \
out[1] = CLAMP(b); \
out[2] = CLAMP(g); \
out[3] = CLAMP(r);
-/* yuv420p -> */
#define LOOKUP_COEFFS int ruv = CoefsRV[*pv]; \
int guv = CoefsGU[*pu] + CoefsGV[*pv]; \
int buv = CoefsBU[*pu]; \
int r, g, b;
+/* yuv420p -> */
#define CONVERT(OUTPUT_FUNC) LOOKUP_COEFFS \
- VANILLA_YUV2RGB_PIXEL(py[0], ruv, guv, buv);\
- OUTPUT_FUNC(dst, r, g, b); \
- VANILLA_YUV2RGB_PIXEL(py[1], ruv, guv, buv);\
- OUTPUT_FUNC((dst+4), r, g, b);
+ VANILLA_YUV2RGB_PIXEL(py[0], ruv, guv, buv) \
+ OUTPUT_FUNC(dst, r, g, b) \
+ VANILLA_YUV2RGB_PIXEL(py[1], ruv, guv, buv) \
+ OUTPUT_FUNC((dst+4), r, g, b)
#define CLEANUP
-YUV_CONVERT(yuv420_to_rgba_vanilla, CONVERT(VANILLA_RGBA_OUT), 2, 8, 2, 1)
-YUV_CONVERT(yuv420_to_bgra_vanilla, CONVERT(VANILLA_BGRA_OUT), 2, 8, 2, 1)
-YUV_CONVERT(yuv420_to_abgr_vanilla, CONVERT(VANILLA_ABGR_OUT), 2, 8, 2, 1)
-YUV_CONVERT(yuv420_to_argb_vanilla, CONVERT(VANILLA_ARGB_OUT), 2, 8, 2, 1)
+YUV_CONVERT(yuv420_to_rgba_vanilla, CONVERT(VANILLA_RGBA_OUT), VANILLA_RGBA_OUT, 2, 8, 2, 1)
+YUV_CONVERT(yuv420_to_bgra_vanilla, CONVERT(VANILLA_BGRA_OUT), VANILLA_BGRA_OUT, 2, 8, 2, 1)
+YUV_CONVERT(yuv420_to_abgr_vanilla, CONVERT(VANILLA_ABGR_OUT), VANILLA_ABGR_OUT, 2, 8, 2, 1)
+YUV_CONVERT(yuv420_to_argb_vanilla, CONVERT(VANILLA_ARGB_OUT), VANILLA_ARGB_OUT, 2, 8, 2, 1)
#undef CONVERT
#undef CLEANUP
+/* although we use cpu runtime detection, we still need these
+ * macros as there's no way e.g. we could compile a x86 asm code
+ * on a ppc machine and vica-versa
+ */
+#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
+#include "x86/oggplay_yuv2rgb_x86.c"
+#elif defined(__ppc__) || defined(__ppc64__)
+//altivec intristics only working with -maltivec gcc flag,
+//but we want runtime altivec detection, hence this has to be
+//fixed!
+//#include "oggplay_yuv2rgb_altivec.c"
+#endif
+
+
+/**
+ * Initialize the lookup-table for vanilla yuv to rgb conversion.
+ */
+static void
+init_vanilla_coeffs (void)
+{
+ int i;
+
+ for(i = 0; i < 256; ++i)
+ {
+ CoefsGU[i] = -CoGU * (i - 128);
+ CoefsGV[i] = -CoGV * (i - 128);
+ CoefsBU[i] = CoBU * (i - 128);
+ CoefsRV[i] = CoRV * (i - 128);
+ CoefsY[i] = CoY * (i - 16) + (prec/2);
+ }
+}
+
+/**
+ * Initialize the function pointers in yuv_conv.
+ *
+ * Initialize the function pointers in yuv_conv, based on the
+ * the available CPU extensions.
+ */
+static void
+init_yuv_converters(void)
+{
+ ogg_uint32_t features = 0;
+
+ if ( yuv_conv.yuv2rgba == NULL )
+ {
+ init_vanilla_coeffs ();
+ features = oc_cpu_flags_get();
+#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
+#if defined(ATTRIBUTE_ALIGNED_MAX) && ATTRIBUTE_ALIGNED_MAX >= 16
+ if (features & OC_CPU_X86_SSE2)
+ {
+ yuv_conv.yuv2rgba = yuv420_to_rgba_sse2;
+ yuv_conv.yuv2bgra = yuv420_to_bgra_sse2;
+ yuv_conv.yuv2argb = yuv420_to_argb_sse2;
+ return;
+ }
+ else
+#endif /* ATTRIBUTE_ALIGNED_MAX */
+ if (features & OC_CPU_X86_MMXEXT)
+ {
+ yuv_conv.yuv2rgba = yuv420_to_rgba_sse;
+ yuv_conv.yuv2bgra = yuv420_to_bgra_sse;
+ yuv_conv.yuv2argb = yuv420_to_argb_sse;
+ return;
+ }
+ else if (features & OC_CPU_X86_MMX)
+ {
+ yuv_conv.yuv2rgba = yuv420_to_rgba_mmx;
+ yuv_conv.yuv2bgra = yuv420_to_bgra_mmx;
+ yuv_conv.yuv2argb = yuv420_to_argb_mmx;
+ return;
+ }
+#elif defined(__ppc__) || defined(__ppc64__)
+ if (features & OC_CPU_PPC_ALTIVEC)
+ {
+ yuv_conv.yuv2rgba = yuv420_to_abgr_vanilla;
+ yuv_conv.yuv2bgra = yuv420_to_argb_vanilla;
+ yuv_conv.yuv2argb = yuv420_to_bgra_vanilla;
+ return;
+ }
+#endif
+ /*
+ * no CPU extension was found... using vanilla converter, with respect
+ * to the endianness of the host
+ */
+#if WORDS_BIGENDIAN || IS_BIG_ENDIAN
+ yuv_conv.yuv2rgba = yuv420_to_abgr_vanilla;
+ yuv_conv.yuv2bgra = yuv420_to_argb_vanilla;
+ yuv_conv.yuv2argb = yuv420_to_bgra_vanilla;
+#else
+ yuv_conv.yuv2rgba = yuv420_to_rgba_vanilla;
+ yuv_conv.yuv2bgra = yuv420_to_bgra_vanilla;
+ yuv_conv.yuv2argb = yuv420_to_argb_vanilla;
+#endif
+ }
+}
+
+
void
oggplay_yuv2rgba(const OggPlayYUVChannels* yuv, OggPlayRGBChannels* rgb)
{
- if (!yuv_initialized)
+ if (yuv_conv.yuv2rgba == NULL)
init_yuv_converters();
-#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
-#if defined(_MSC_VER) || (defined(ATTRIBUTE_ALIGNED_MAX) && ATTRIBUTE_ALIGNED_MAX >= 16)
- if (yuv->y_width % 16 == 0 && cpu_features & OC_CPU_X86_SSE2)
- return yuv420_to_rgba_sse2(yuv, rgb);
-#endif
- if (yuv->y_width % 8 == 0 && cpu_features & OC_CPU_X86_MMX)
- return yuv420_to_rgba_mmx(yuv, rgb);
-#elif defined(__ppc__) || defined(__ppc64__)
- if (yuv->y_width % 16 == 0 && yuv->y_height % 2 == 0 && cpu_features & OC_CPU_PPC_ALTIVEC)
- return yuv420_to_abgr_vanilla(yuv, rgb);
-#endif
-
-#if WORDS_BIGENDIAN || IS_BIG_ENDIAN
- return yuv420_to_abgr_vanilla(yuv, rgb);
-#else
- return yuv420_to_rgba_vanilla(yuv, rgb);
-#endif
+ yuv_conv.yuv2rgba(yuv, rgb);
}
void
oggplay_yuv2bgra(const OggPlayYUVChannels* yuv, OggPlayRGBChannels * rgb)
{
- if (!yuv_initialized)
+ if (yuv_conv.yuv2bgra == NULL)
init_yuv_converters();
-#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
-#if defined(_MSC_VER) || (defined(ATTRIBUTE_ALIGNED_MAX) && ATTRIBUTE_ALIGNED_MAX >= 16)
- if (yuv->y_width % 16 == 0 && cpu_features & OC_CPU_X86_SSE2)
- return yuv420_to_bgra_sse2(yuv, rgb);
-#endif
- if (yuv->y_width % 8 == 0 && cpu_features & OC_CPU_X86_MMX)
- return yuv420_to_bgra_mmx(yuv, rgb);
-#elif defined(__ppc__) || defined(__ppc64__)
- if (yuv->y_width % 16 == 0 && yuv->y_height % 2 == 0 && cpu_features & OC_CPU_PPC_ALTIVEC)
- return yuv420_to_argb_vanilla(yuv, rgb);
-#endif
-
-#if WORDS_BIGENDIAN || IS_BIG_ENDIAN
- return yuv420_to_argb_vanilla(yuv, rgb);
-#else
- return yuv420_to_bgra_vanilla(yuv, rgb);
-#endif
+ yuv_conv.yuv2bgra(yuv, rgb);
}
void
oggplay_yuv2argb(const OggPlayYUVChannels* yuv, OggPlayRGBChannels * rgb)
{
- if (!yuv_initialized)
+ if (yuv_conv.yuv2argb == NULL)
init_yuv_converters();
-#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
-#if defined(_MSC_VER) || (defined(ATTRIBUTE_ALIGNED_MAX) && ATTRIBUTE_ALIGNED_MAX >= 16)
- if (yuv->y_width % 16 == 0 && cpu_features & OC_CPU_X86_SSE2)
- return yuv420_to_argb_sse2(yuv, rgb);
-#endif
- if (yuv->y_width % 8 == 0 && cpu_features & OC_CPU_X86_MMX)
- return yuv420_to_argb_mmx(yuv, rgb);
-#elif defined(__ppc__) || defined(__ppc64__)
- if (yuv->y_width % 16 == 0 && yuv->y_height % 2 == 0 && cpu_features & OC_CPU_PPC_ALTIVEC)
- return yuv420_to_bgra_vanilla(yuv, rgb);
-#endif
-
-#if WORDS_BIGENDIAN || IS_BIG_ENDIAN
- return yuv420_to_bgra_vanilla(yuv, rgb);
-#else
- return yuv420_to_argb_vanilla(yuv, rgb);
-#endif
+ yuv_conv.yuv2argb(yuv, rgb);
}
diff --git a/media/liboggplay/src/liboggplay/oggplay_yuv2rgb_template.h b/media/liboggplay/src/liboggplay/oggplay_yuv2rgb_template.h
--- a/media/liboggplay/src/liboggplay/oggplay_yuv2rgb_template.h
+++ b/media/liboggplay/src/liboggplay/oggplay_yuv2rgb_template.h
@@ -8,55 +8,80 @@
#define restrict __restrict__
#endif
#endif
/**
* Template for YUV to RGB conversion
*
* @param FUNC function name
- * @param CONVERT a macro that defines
+ * @param CONVERT a macro that defines the actual conversion function
+ * @param VANILLA_OUT
* @param NUM_PIXELS number of pixels processed in one iteration
* @param OUT_SHIFT number of pixels to shift after one iteration in rgb data stream
* @param Y_SHIFT number of pixels to shift after one iteration in Y data stream
* @param UV_SHIFT
*/
-#define YUV_CONVERT(FUNC, CONVERT, NUM_PIXELS, OUT_SHIFT, Y_SHIFT, UV_SHIFT)\
+#define YUV_CONVERT(FUNC, CONVERT, VANILLA_OUT, NUM_PIXELS, OUT_SHIFT, Y_SHIFT, UV_SHIFT)\
static void \
(FUNC)(const OggPlayYUVChannels* yuv, OggPlayRGBChannels* rgb) \
{ \
- int i,j, w, h; \
+ int i,j, w, h, r; \
unsigned char* restrict ptry; \
unsigned char* restrict ptru; \
unsigned char* restrict ptrv; \
unsigned char* restrict ptro; \
unsigned char *dst, *py, *pu, *pv; \
\
ptro = rgb->ptro; \
ptry = yuv->ptry; \
ptru = yuv->ptru; \
ptrv = yuv->ptrv; \
\
- w = yuv->y_width/NUM_PIXELS; \
+ w = yuv->y_width / NUM_PIXELS; \
h = yuv->y_height; \
+ r = yuv->y_width % NUM_PIXELS; \
for (i = 0; i < h; ++i) \
{ \
py = ptry; \
pu = ptru; \
pv = ptrv; \
dst = ptro; \
for (j = 0; j < w; ++j, \
dst += OUT_SHIFT, \
py += Y_SHIFT, \
pu += UV_SHIFT, \
pv += UV_SHIFT) \
{ \
/* use the given conversion function */ \
CONVERT \
} \
+ /* \
+ * the video frame is not the multiple of NUM_PIXELS, \
+ * thus we have to deal with remaning pixels using \
+ * vanilla implementation. \
+ */ \
+ if (r) { \
+ for \
+ ( \
+ j=(yuv->y_width-r); j < yuv->y_width; \
+ ++j, \
+ dst += 4, \
+ py += 1 \
+ ) \
+ { \
+ LOOKUP_COEFFS \
+ VANILLA_YUV2RGB_PIXEL(py[0], ruv, guv, buv) \
+ VANILLA_OUT(dst, r, g, b) \
+ if (!(j%2)) { \
+ pu += 1; pv += 1; \
+ } \
+ } \
+ } \
+ \
ptro += rgb->rgb_width * 4; \
ptry += yuv->y_width; \
\
if (i & 0x1) \
{ \
ptru += yuv->uv_width; \
ptrv += yuv->uv_width; \
} \
diff --git a/media/liboggplay/src/liboggplay/oggplay_yuv2rgb_x86.c b/media/liboggplay/src/liboggplay/x86/oggplay_yuv2rgb_x86.c
rename from media/liboggplay/src/liboggplay/oggplay_yuv2rgb_x86.c
rename to media/liboggplay/src/liboggplay/x86/oggplay_yuv2rgb_x86.c
--- a/media/liboggplay/src/liboggplay/oggplay_yuv2rgb_x86.c
+++ b/media/liboggplay/src/liboggplay/x86/oggplay_yuv2rgb_x86.c
@@ -28,16 +28,19 @@
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* YUV to RGB conversion using x86 CPU extensions
*/
+#include "oggplay_private.h"
+#include "oggplay_yuv2rgb_template.h"
+#include "cpu.h"
#if defined(_MSC_VER)
#include "yuv2rgb_x86_vs.h"
#elif defined(__GNUC__)
#include "yuv2rgb_x86.h"
#endif
typedef union
@@ -78,59 +81,72 @@ static const simd_t simd_table[9] = {
{{ALFA, ALFA}}
};
/**
* the conversion functions using MMX instructions
*/
/* template for the MMX conversion functions */
-#define YUV_CONVERT_MMX(FUNC, CONVERT) YUV_CONVERT(FUNC, CONVERT, 8, 32, 8, 4)
+#define YUV_CONVERT_MMX(FUNC, CONVERT, CONV_BY_PIXEL) YUV_CONVERT(FUNC, CONVERT, CONV_BY_PIXEL, 8, 32, 8, 4)
+
#define CLEANUP emms()
#define OUT_RGBA_32 OUTPUT_RGBA_32(movq, mm, 8, 16, 24)
#define OUT_ARGB_32 OUTPUT_ARGB_32(movq, mm, 8, 16, 24)
#define OUT_BGRA_32 OUTPUT_BGRA_32(movq, mm, 8, 16, 24)
#define MOVNTQ MMX_MOVNTQ
/* yuv420 -> */
#define CONVERT(OUTPUT_FUNC) LOAD_YUV_PLANAR_2(movq, mm) \
- YUV_2_RGB(movq, mm) \
- OUTPUT_FUNC
+ YUV_2_RGB(movq, mm) \
+ OUTPUT_FUNC
-YUV_CONVERT_MMX(yuv420_to_rgba_mmx, CONVERT(OUT_RGBA_32))
-YUV_CONVERT_MMX(yuv420_to_bgra_mmx, CONVERT(OUT_BGRA_32))
-YUV_CONVERT_MMX(yuv420_to_argb_mmx, CONVERT(OUT_ARGB_32))
+YUV_CONVERT_MMX(yuv420_to_rgba_mmx, CONVERT(OUT_RGBA_32), VANILLA_RGBA_OUT)
+YUV_CONVERT_MMX(yuv420_to_bgra_mmx, CONVERT(OUT_BGRA_32), VANILLA_BGRA_OUT)
+YUV_CONVERT_MMX(yuv420_to_argb_mmx, CONVERT(OUT_ARGB_32), VANILLA_ARGB_OUT)
+
+#undef MOVNTQ
+
+
+/* template for the SSE conversion functions */
+#define MOVNTQ SSE_MOVNTQ
+
+YUV_CONVERT_MMX(yuv420_to_rgba_sse, CONVERT(OUT_RGBA_32), VANILLA_RGBA_OUT)
+YUV_CONVERT_MMX(yuv420_to_bgra_sse, CONVERT(OUT_BGRA_32), VANILLA_BGRA_OUT)
+YUV_CONVERT_MMX(yuv420_to_argb_sse, CONVERT(OUT_ARGB_32), VANILLA_ARGB_OUT)
+
#undef CONVERT
-
#undef CLEANUP
#undef OUT_RGBA_32
#undef OUT_ARGB_32
#undef OUT_BGRA_32
#undef MOVNTQ
+
/**
* the conversion functions using SSE2 instructions
*/
/* template for the SSE2 conversion functions */
-#define YUV_CONVERT_SSE2(FUNC, CONVERT) YUV_CONVERT(FUNC, CONVERT, 16, 64, 16, 8)
+#define YUV_CONVERT_SSE2(FUNC, CONVERT, CONV_BY_PIX) YUV_CONVERT(FUNC, CONVERT, CONV_BY_PIX, 16, 64, 16, 8)
+
#define OUT_RGBA_32 OUTPUT_RGBA_32(movdqa, xmm, 16, 32, 48)
#define OUT_ARGB_32 OUTPUT_ARGB_32(movdqa, xmm, 16, 32, 48)
#define OUT_BGRA_32 OUTPUT_BGRA_32(movdqa, xmm, 16, 32, 48)
#define MOVNTQ SSE2_MOVNTQ
#define CLEANUP
/* yuv420 -> */
#define CONVERT(OUTPUT_FUNC) LOAD_YUV_PLANAR_2(movdqu, xmm) \
- YUV_2_RGB(movdqa, xmm) \
- OUTPUT_FUNC
+ YUV_2_RGB(movdqa, xmm) \
+ OUTPUT_FUNC
-YUV_CONVERT_SSE2(yuv420_to_rgba_sse2, CONVERT(OUT_RGBA_32))
-YUV_CONVERT_SSE2(yuv420_to_bgra_sse2, CONVERT(OUT_BGRA_32))
-YUV_CONVERT_SSE2(yuv420_to_argb_sse2, CONVERT(OUT_ARGB_32))
+YUV_CONVERT_SSE2(yuv420_to_rgba_sse2, CONVERT(OUT_RGBA_32), VANILLA_RGBA_OUT)
+YUV_CONVERT_SSE2(yuv420_to_bgra_sse2, CONVERT(OUT_BGRA_32), VANILLA_BGRA_OUT)
+YUV_CONVERT_SSE2(yuv420_to_argb_sse2, CONVERT(OUT_ARGB_32), VANILLA_ARGB_OUT)
+
#undef CONVERT
-
#undef OUT_RGBA_32
#undef OUT_ARGB_32
#undef OUT_BGRA_32
#undef MOVNTQ
-#undef CLEANUP
+#undef CLEANUP
diff --git a/media/liboggplay/src/liboggplay/yuv2rgb_x86.h b/media/liboggplay/src/liboggplay/x86/yuv2rgb_x86.h
rename from media/liboggplay/src/liboggplay/yuv2rgb_x86.h
rename to media/liboggplay/src/liboggplay/x86/yuv2rgb_x86.h
--- a/media/liboggplay/src/liboggplay/yuv2rgb_x86.h
+++ b/media/liboggplay/src/liboggplay/x86/yuv2rgb_x86.h
@@ -3,17 +3,18 @@
# ifdef ATTRIBUTE_ALIGNED_MAX
#define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((ATTRIBUTE_ALIGNED_MAX < align) ? ATTRIBUTE_ALIGNED_MAX : align)))
# else
#define ATTR_ALIGN(align)
# endif
#define emms() __asm__ __volatile__ ( "emms;" );
-#define MMX_MOVNTQ "movntq"
+#define MMX_MOVNTQ "movq"
+#define SSE_MOVNTQ "movntq"
#define SSE2_MOVNTQ "movdqu"
#define YUV_2_RGB(mov_instr, reg_type) \
__asm__ __volatile__ ( \
"punpcklbw %%"#reg_type"4, %%"#reg_type"0;" /* mm0 = u3 u2 u1 u0 */\
"punpcklbw %%"#reg_type"4, %%"#reg_type"1;" /* mm1 = v3 v2 v1 v0 */\
"psubsw (%0), %%"#reg_type"0;" /* u -= 128 */\
"psubsw (%0), %%"#reg_type"1;" /* v -= 128 */\
diff --git a/media/liboggplay/src/liboggplay/yuv2rgb_x86_vs.h b/media/liboggplay/src/liboggplay/x86/yuv2rgb_x86_vs.h
rename from media/liboggplay/src/liboggplay/yuv2rgb_x86_vs.h
rename to media/liboggplay/src/liboggplay/x86/yuv2rgb_x86_vs.h
--- a/media/liboggplay/src/liboggplay/yuv2rgb_x86_vs.h
+++ b/media/liboggplay/src/liboggplay/x86/yuv2rgb_x86_vs.h
@@ -1,15 +1,16 @@
#ifndef __OGGPLAY_YUV2RGB_VS_H__
#define __OGGPLAY_YUV2RGB_VS_H__
#define ATTR_ALIGN(_align) __declspec(align(_align))
#define emms() __asm emms
-#define MMX_MOVNTQ movntq
+#define MMX_MOVNTQ movq
+#define SSE_MOVNTQ movntq
#define SSE2_MOVNTQ movdqu
#define LOAD_YUV_PLANAR_2(mov_instr, reg_type) \
__asm { \
__asm mov eax, py \
__asm mov edx, pu \
__asm mov_instr reg_type##6, [eax] \
__asm mov_instr reg_type##0, [edx] \

View File

@ -0,0 +1,43 @@
diff --git a/media/liboggplay/src/liboggplay/oggplay_yuv2rgb_template.h b/media/liboggplay/src/liboggplay/oggplay_yuv2rgb_template.h
--- a/media/liboggplay/src/liboggplay/oggplay_yuv2rgb_template.h
+++ b/media/liboggplay/src/liboggplay/oggplay_yuv2rgb_template.h
@@ -55,28 +55,38 @@ static void
CONVERT \
} \
/* \
* the video frame is not the multiple of NUM_PIXELS, \
* thus we have to deal with remaning pixels using \
* vanilla implementation. \
*/ \
if (r) { \
+ /* if there's only 1 remaining pixel to process \
+ and the luma width is odd, the for loop above \
+ has already advanced pu and pv too far. */ \
+ if (r==1 && yuv->y_width&1) { \
+ pu -= 1; pv -= 1; \
+ } \
for \
( \
j=(yuv->y_width-r); j < yuv->y_width; \
++j, \
dst += 4, \
py += 1 \
) \
{ \
LOOKUP_COEFFS \
VANILLA_YUV2RGB_PIXEL(py[0], ruv, guv, buv) \
VANILLA_OUT(dst, r, g, b) \
- if (!(j%2)) { \
+ /* advance chroma ptrs every second sample, except \
+ when the luma width is odd, in which case the \
+ chroma samples are truncated and we must reuse \
+ the previous chroma sample */ \
+ if (j%2 && !(j+1==yuv->y_width-1 && yuv->y_width&1)) { \
pu += 1; pv += 1; \
} \
} \
} \
\
ptro += rgb->rgb_width * 4; \
ptry += yuv->y_width; \
\

View File

@ -47,21 +47,22 @@
/* cpu extension detection */
#include "cpu.c"
/* although we use cpu runtime detection, we still need these
* macros as there's no way e.g. we could compile a x86 asm code
* on a ppc machine and vica-versa
/**
* yuv_convert_fptr type is a function pointer type for
* the various yuv-rgb converters
*/
#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
#include "oggplay_yuv2rgb_x86.c"
#elif defined(__ppc__) || defined(__ppc64__)
//altivec intristics only working with -maltivec gcc flag,
//but we want runtime altivec detection, hence this has to be
//fixed!
//#include "oggplay_yuv2rgb_altivec.c"
#endif
typedef void (*yuv_convert_fptr) (const OggPlayYUVChannels *yuv,
OggPlayRGBChannels *rgb);
static int yuv_initialized;
static ogg_uint32_t cpu_features;
/* it is useless to determine each YUV conversion run
* the cpu type/featurs, thus we save the conversion function
* pointers
*/
static struct OggPlayYUVConverters {
yuv_convert_fptr yuv2rgba; /**< YUV420 to RGBA */
yuv_convert_fptr yuv2bgra; /**< YUV420 to BGRA */
yuv_convert_fptr yuv2argb; /**< YUV420 to ARGB */
} yuv_conv = {NULL, NULL, NULL};
/**
* vanilla implementation of YUV-to-RGB conversion.
@ -71,8 +72,6 @@ static ogg_uint32_t cpu_features;
*
*/
#define CLAMP(v) ((v) > 255 ? 255 : (v) < 0 ? 0 : (v))
#define prec 15
static const int CoY = (int)(1.164 * (1 << prec) + 0.5);
static const int CoRV = (int)(1.596 * (1 << prec) + 0.5);
@ -80,33 +79,13 @@ static const int CoGU = (int)(0.391 * (1 << prec) + 0.5);
static const int CoGV = (int)(0.813 * (1 << prec) + 0.5);
static const int CoBU = (int)(2.018 * (1 << prec) + 0.5);
static int CoefsGU[256];
static int CoefsGU[256] = {0};
static int CoefsGV[256];
static int CoefsBU[256];
static int CoefsRV[256];
static int CoefsY[256];
/**
* Initialize the lookup-table for vanilla yuv to rgb conversion
* and the cpu_features global.
*/
static void
init_yuv_converters()
{
int i;
for(i = 0; i < 256; ++i)
{
CoefsGU[i] = -CoGU * (i - 128);
CoefsGV[i] = -CoGV * (i - 128);
CoefsBU[i] = CoBU * (i - 128);
CoefsRV[i] = CoRV * (i - 128);
CoefsY[i] = CoY * (i - 16) + (prec/2);
}
cpu_features = oc_cpu_flags_get();
yuv_initialized = 1;
}
#define CLAMP(v) ((v) > 255 ? 255 : (v) < 0 ? 0 : (v))
#define VANILLA_YUV2RGB_PIXEL(y, ruv, guv, buv) \
r = (CoefsY[y] + ruv) >> prec; \
@ -137,100 +116,150 @@ out[1] = CLAMP(b); \
out[2] = CLAMP(g); \
out[3] = CLAMP(r);
/* yuv420p -> */
#define LOOKUP_COEFFS int ruv = CoefsRV[*pv]; \
int guv = CoefsGU[*pu] + CoefsGV[*pv]; \
int buv = CoefsBU[*pu]; \
int r, g, b;
/* yuv420p -> */
#define CONVERT(OUTPUT_FUNC) LOOKUP_COEFFS \
VANILLA_YUV2RGB_PIXEL(py[0], ruv, guv, buv);\
OUTPUT_FUNC(dst, r, g, b); \
VANILLA_YUV2RGB_PIXEL(py[1], ruv, guv, buv);\
OUTPUT_FUNC((dst+4), r, g, b);
VANILLA_YUV2RGB_PIXEL(py[0], ruv, guv, buv) \
OUTPUT_FUNC(dst, r, g, b) \
VANILLA_YUV2RGB_PIXEL(py[1], ruv, guv, buv) \
OUTPUT_FUNC((dst+4), r, g, b)
#define CLEANUP
YUV_CONVERT(yuv420_to_rgba_vanilla, CONVERT(VANILLA_RGBA_OUT), 2, 8, 2, 1)
YUV_CONVERT(yuv420_to_bgra_vanilla, CONVERT(VANILLA_BGRA_OUT), 2, 8, 2, 1)
YUV_CONVERT(yuv420_to_abgr_vanilla, CONVERT(VANILLA_ABGR_OUT), 2, 8, 2, 1)
YUV_CONVERT(yuv420_to_argb_vanilla, CONVERT(VANILLA_ARGB_OUT), 2, 8, 2, 1)
YUV_CONVERT(yuv420_to_rgba_vanilla, CONVERT(VANILLA_RGBA_OUT), VANILLA_RGBA_OUT, 2, 8, 2, 1)
YUV_CONVERT(yuv420_to_bgra_vanilla, CONVERT(VANILLA_BGRA_OUT), VANILLA_BGRA_OUT, 2, 8, 2, 1)
YUV_CONVERT(yuv420_to_abgr_vanilla, CONVERT(VANILLA_ABGR_OUT), VANILLA_ABGR_OUT, 2, 8, 2, 1)
YUV_CONVERT(yuv420_to_argb_vanilla, CONVERT(VANILLA_ARGB_OUT), VANILLA_ARGB_OUT, 2, 8, 2, 1)
#undef CONVERT
#undef CLEANUP
/* although we use cpu runtime detection, we still need these
* macros as there's no way e.g. we could compile a x86 asm code
* on a ppc machine and vica-versa
*/
#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
#include "x86/oggplay_yuv2rgb_x86.c"
#elif defined(__ppc__) || defined(__ppc64__)
//altivec intristics only working with -maltivec gcc flag,
//but we want runtime altivec detection, hence this has to be
//fixed!
//#include "oggplay_yuv2rgb_altivec.c"
#endif
/**
* Initialize the lookup-table for vanilla yuv to rgb conversion.
*/
static void
init_vanilla_coeffs (void)
{
int i;
for(i = 0; i < 256; ++i)
{
CoefsGU[i] = -CoGU * (i - 128);
CoefsGV[i] = -CoGV * (i - 128);
CoefsBU[i] = CoBU * (i - 128);
CoefsRV[i] = CoRV * (i - 128);
CoefsY[i] = CoY * (i - 16) + (prec/2);
}
}
/**
* Initialize the function pointers in yuv_conv.
*
* Initialize the function pointers in yuv_conv, based on the
* the available CPU extensions.
*/
static void
init_yuv_converters(void)
{
ogg_uint32_t features = 0;
if ( yuv_conv.yuv2rgba == NULL )
{
init_vanilla_coeffs ();
features = oc_cpu_flags_get();
#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
#if defined(ATTRIBUTE_ALIGNED_MAX) && ATTRIBUTE_ALIGNED_MAX >= 16
if (features & OC_CPU_X86_SSE2)
{
yuv_conv.yuv2rgba = yuv420_to_rgba_sse2;
yuv_conv.yuv2bgra = yuv420_to_bgra_sse2;
yuv_conv.yuv2argb = yuv420_to_argb_sse2;
return;
}
else
#endif /* ATTRIBUTE_ALIGNED_MAX */
if (features & OC_CPU_X86_MMXEXT)
{
yuv_conv.yuv2rgba = yuv420_to_rgba_sse;
yuv_conv.yuv2bgra = yuv420_to_bgra_sse;
yuv_conv.yuv2argb = yuv420_to_argb_sse;
return;
}
else if (features & OC_CPU_X86_MMX)
{
yuv_conv.yuv2rgba = yuv420_to_rgba_mmx;
yuv_conv.yuv2bgra = yuv420_to_bgra_mmx;
yuv_conv.yuv2argb = yuv420_to_argb_mmx;
return;
}
#elif defined(__ppc__) || defined(__ppc64__)
if (features & OC_CPU_PPC_ALTIVEC)
{
yuv_conv.yuv2rgba = yuv420_to_abgr_vanilla;
yuv_conv.yuv2bgra = yuv420_to_argb_vanilla;
yuv_conv.yuv2argb = yuv420_to_bgra_vanilla;
return;
}
#endif
/*
* no CPU extension was found... using vanilla converter, with respect
* to the endianness of the host
*/
#if WORDS_BIGENDIAN || IS_BIG_ENDIAN
yuv_conv.yuv2rgba = yuv420_to_abgr_vanilla;
yuv_conv.yuv2bgra = yuv420_to_argb_vanilla;
yuv_conv.yuv2argb = yuv420_to_bgra_vanilla;
#else
yuv_conv.yuv2rgba = yuv420_to_rgba_vanilla;
yuv_conv.yuv2bgra = yuv420_to_bgra_vanilla;
yuv_conv.yuv2argb = yuv420_to_argb_vanilla;
#endif
}
}
void
oggplay_yuv2rgba(const OggPlayYUVChannels* yuv, OggPlayRGBChannels* rgb)
{
if (!yuv_initialized)
if (yuv_conv.yuv2rgba == NULL)
init_yuv_converters();
#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
#if defined(_MSC_VER) || (defined(ATTRIBUTE_ALIGNED_MAX) && ATTRIBUTE_ALIGNED_MAX >= 16)
if (yuv->y_width % 16 == 0 && cpu_features & OC_CPU_X86_SSE2)
return yuv420_to_rgba_sse2(yuv, rgb);
#endif
if (yuv->y_width % 8 == 0 && cpu_features & OC_CPU_X86_MMX)
return yuv420_to_rgba_mmx(yuv, rgb);
#elif defined(__ppc__) || defined(__ppc64__)
if (yuv->y_width % 16 == 0 && yuv->y_height % 2 == 0 && cpu_features & OC_CPU_PPC_ALTIVEC)
return yuv420_to_abgr_vanilla(yuv, rgb);
#endif
#if WORDS_BIGENDIAN || IS_BIG_ENDIAN
return yuv420_to_abgr_vanilla(yuv, rgb);
#else
return yuv420_to_rgba_vanilla(yuv, rgb);
#endif
yuv_conv.yuv2rgba(yuv, rgb);
}
void
oggplay_yuv2bgra(const OggPlayYUVChannels* yuv, OggPlayRGBChannels * rgb)
{
if (!yuv_initialized)
if (yuv_conv.yuv2bgra == NULL)
init_yuv_converters();
#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
#if defined(_MSC_VER) || (defined(ATTRIBUTE_ALIGNED_MAX) && ATTRIBUTE_ALIGNED_MAX >= 16)
if (yuv->y_width % 16 == 0 && cpu_features & OC_CPU_X86_SSE2)
return yuv420_to_bgra_sse2(yuv, rgb);
#endif
if (yuv->y_width % 8 == 0 && cpu_features & OC_CPU_X86_MMX)
return yuv420_to_bgra_mmx(yuv, rgb);
#elif defined(__ppc__) || defined(__ppc64__)
if (yuv->y_width % 16 == 0 && yuv->y_height % 2 == 0 && cpu_features & OC_CPU_PPC_ALTIVEC)
return yuv420_to_argb_vanilla(yuv, rgb);
#endif
#if WORDS_BIGENDIAN || IS_BIG_ENDIAN
return yuv420_to_argb_vanilla(yuv, rgb);
#else
return yuv420_to_bgra_vanilla(yuv, rgb);
#endif
yuv_conv.yuv2bgra(yuv, rgb);
}
void
oggplay_yuv2argb(const OggPlayYUVChannels* yuv, OggPlayRGBChannels * rgb)
{
if (!yuv_initialized)
if (yuv_conv.yuv2argb == NULL)
init_yuv_converters();
#if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86)
#if defined(_MSC_VER) || (defined(ATTRIBUTE_ALIGNED_MAX) && ATTRIBUTE_ALIGNED_MAX >= 16)
if (yuv->y_width % 16 == 0 && cpu_features & OC_CPU_X86_SSE2)
return yuv420_to_argb_sse2(yuv, rgb);
#endif
if (yuv->y_width % 8 == 0 && cpu_features & OC_CPU_X86_MMX)
return yuv420_to_argb_mmx(yuv, rgb);
#elif defined(__ppc__) || defined(__ppc64__)
if (yuv->y_width % 16 == 0 && yuv->y_height % 2 == 0 && cpu_features & OC_CPU_PPC_ALTIVEC)
return yuv420_to_bgra_vanilla(yuv, rgb);
#endif
#if WORDS_BIGENDIAN || IS_BIG_ENDIAN
return yuv420_to_bgra_vanilla(yuv, rgb);
#else
return yuv420_to_argb_vanilla(yuv, rgb);
#endif
yuv_conv.yuv2argb(yuv, rgb);
}

View File

@ -13,17 +13,18 @@
* Template for YUV to RGB conversion
*
* @param FUNC function name
* @param CONVERT a macro that defines
* @param CONVERT a macro that defines the actual conversion function
* @param VANILLA_OUT
* @param NUM_PIXELS number of pixels processed in one iteration
* @param OUT_SHIFT number of pixels to shift after one iteration in rgb data stream
* @param Y_SHIFT number of pixels to shift after one iteration in Y data stream
* @param UV_SHIFT
*/
#define YUV_CONVERT(FUNC, CONVERT, NUM_PIXELS, OUT_SHIFT, Y_SHIFT, UV_SHIFT)\
#define YUV_CONVERT(FUNC, CONVERT, VANILLA_OUT, NUM_PIXELS, OUT_SHIFT, Y_SHIFT, UV_SHIFT)\
static void \
(FUNC)(const OggPlayYUVChannels* yuv, OggPlayRGBChannels* rgb) \
{ \
int i,j, w, h; \
int i,j, w, h, r; \
unsigned char* restrict ptry; \
unsigned char* restrict ptru; \
unsigned char* restrict ptrv; \
@ -35,8 +36,9 @@ static void \
ptru = yuv->ptru; \
ptrv = yuv->ptrv; \
\
w = yuv->y_width/NUM_PIXELS; \
w = yuv->y_width / NUM_PIXELS; \
h = yuv->y_height; \
r = yuv->y_width % NUM_PIXELS; \
for (i = 0; i < h; ++i) \
{ \
py = ptry; \
@ -52,6 +54,39 @@ static void \
/* use the given conversion function */ \
CONVERT \
} \
/* \
* the video frame is not the multiple of NUM_PIXELS, \
* thus we have to deal with remaning pixels using \
* vanilla implementation. \
*/ \
if (r) { \
/* if there's only 1 remaining pixel to process \
and the luma width is odd, the for loop above \
has already advanced pu and pv too far. */ \
if (r==1 && yuv->y_width&1) { \
pu -= 1; pv -= 1; \
} \
for \
( \
j=(yuv->y_width-r); j < yuv->y_width; \
++j, \
dst += 4, \
py += 1 \
) \
{ \
LOOKUP_COEFFS \
VANILLA_YUV2RGB_PIXEL(py[0], ruv, guv, buv) \
VANILLA_OUT(dst, r, g, b) \
/* advance chroma ptrs every second sample, except \
when the luma width is odd, in which case the \
chroma samples are truncated and we must reuse \
the previous chroma sample */ \
if (j%2 && !(j+1==yuv->y_width-1 && yuv->y_width&1)) { \
pu += 1; pv += 1; \
} \
} \
} \
\
ptro += rgb->rgb_width * 4; \
ptry += yuv->y_width; \
\

View File

@ -33,6 +33,9 @@
/**
* YUV to RGB conversion using x86 CPU extensions
*/
#include "oggplay_private.h"
#include "oggplay_yuv2rgb_template.h"
#include "cpu.h"
#if defined(_MSC_VER)
#include "yuv2rgb_x86_vs.h"
@ -83,7 +86,8 @@ static const simd_t simd_table[9] = {
*/
/* template for the MMX conversion functions */
#define YUV_CONVERT_MMX(FUNC, CONVERT) YUV_CONVERT(FUNC, CONVERT, 8, 32, 8, 4)
#define YUV_CONVERT_MMX(FUNC, CONVERT, CONV_BY_PIXEL) YUV_CONVERT(FUNC, CONVERT, CONV_BY_PIXEL, 8, 32, 8, 4)
#define CLEANUP emms()
#define OUT_RGBA_32 OUTPUT_RGBA_32(movq, mm, 8, 16, 24)
#define OUT_ARGB_32 OUTPUT_ARGB_32(movq, mm, 8, 16, 24)
@ -92,26 +96,38 @@ static const simd_t simd_table[9] = {
/* yuv420 -> */
#define CONVERT(OUTPUT_FUNC) LOAD_YUV_PLANAR_2(movq, mm) \
YUV_2_RGB(movq, mm) \
OUTPUT_FUNC
YUV_2_RGB(movq, mm) \
OUTPUT_FUNC
YUV_CONVERT_MMX(yuv420_to_rgba_mmx, CONVERT(OUT_RGBA_32), VANILLA_RGBA_OUT)
YUV_CONVERT_MMX(yuv420_to_bgra_mmx, CONVERT(OUT_BGRA_32), VANILLA_BGRA_OUT)
YUV_CONVERT_MMX(yuv420_to_argb_mmx, CONVERT(OUT_ARGB_32), VANILLA_ARGB_OUT)
#undef MOVNTQ
/* template for the SSE conversion functions */
#define MOVNTQ SSE_MOVNTQ
YUV_CONVERT_MMX(yuv420_to_rgba_sse, CONVERT(OUT_RGBA_32), VANILLA_RGBA_OUT)
YUV_CONVERT_MMX(yuv420_to_bgra_sse, CONVERT(OUT_BGRA_32), VANILLA_BGRA_OUT)
YUV_CONVERT_MMX(yuv420_to_argb_sse, CONVERT(OUT_ARGB_32), VANILLA_ARGB_OUT)
YUV_CONVERT_MMX(yuv420_to_rgba_mmx, CONVERT(OUT_RGBA_32))
YUV_CONVERT_MMX(yuv420_to_bgra_mmx, CONVERT(OUT_BGRA_32))
YUV_CONVERT_MMX(yuv420_to_argb_mmx, CONVERT(OUT_ARGB_32))
#undef CONVERT
#undef CLEANUP
#undef OUT_RGBA_32
#undef OUT_ARGB_32
#undef OUT_BGRA_32
#undef MOVNTQ
/**
* the conversion functions using SSE2 instructions
*/
/* template for the SSE2 conversion functions */
#define YUV_CONVERT_SSE2(FUNC, CONVERT) YUV_CONVERT(FUNC, CONVERT, 16, 64, 16, 8)
#define YUV_CONVERT_SSE2(FUNC, CONVERT, CONV_BY_PIX) YUV_CONVERT(FUNC, CONVERT, CONV_BY_PIX, 16, 64, 16, 8)
#define OUT_RGBA_32 OUTPUT_RGBA_32(movdqa, xmm, 16, 32, 48)
#define OUT_ARGB_32 OUTPUT_ARGB_32(movdqa, xmm, 16, 32, 48)
#define OUT_BGRA_32 OUTPUT_BGRA_32(movdqa, xmm, 16, 32, 48)
@ -120,17 +136,17 @@ YUV_CONVERT_MMX(yuv420_to_argb_mmx, CONVERT(OUT_ARGB_32))
/* yuv420 -> */
#define CONVERT(OUTPUT_FUNC) LOAD_YUV_PLANAR_2(movdqu, xmm) \
YUV_2_RGB(movdqa, xmm) \
OUTPUT_FUNC
YUV_2_RGB(movdqa, xmm) \
OUTPUT_FUNC
YUV_CONVERT_SSE2(yuv420_to_rgba_sse2, CONVERT(OUT_RGBA_32), VANILLA_RGBA_OUT)
YUV_CONVERT_SSE2(yuv420_to_bgra_sse2, CONVERT(OUT_BGRA_32), VANILLA_BGRA_OUT)
YUV_CONVERT_SSE2(yuv420_to_argb_sse2, CONVERT(OUT_ARGB_32), VANILLA_ARGB_OUT)
YUV_CONVERT_SSE2(yuv420_to_rgba_sse2, CONVERT(OUT_RGBA_32))
YUV_CONVERT_SSE2(yuv420_to_bgra_sse2, CONVERT(OUT_BGRA_32))
YUV_CONVERT_SSE2(yuv420_to_argb_sse2, CONVERT(OUT_ARGB_32))
#undef CONVERT
#undef OUT_RGBA_32
#undef OUT_ARGB_32
#undef OUT_BGRA_32
#undef MOVNTQ
#undef CLEANUP
#undef CLEANUP

View File

@ -8,7 +8,8 @@
# endif
#define emms() __asm__ __volatile__ ( "emms;" );
#define MMX_MOVNTQ "movntq"
#define MMX_MOVNTQ "movq"
#define SSE_MOVNTQ "movntq"
#define SSE2_MOVNTQ "movdqu"
#define YUV_2_RGB(mov_instr, reg_type) \

View File

@ -4,7 +4,8 @@
#define ATTR_ALIGN(_align) __declspec(align(_align))
#define emms() __asm emms
#define MMX_MOVNTQ movntq
#define MMX_MOVNTQ movq
#define SSE_MOVNTQ movntq
#define SSE2_MOVNTQ movdqu
#define LOAD_YUV_PLANAR_2(mov_instr, reg_type) \

View File

@ -44,10 +44,11 @@ sed 's/#include <config.h>/#ifdef WIN32\
#endif/g' ./src/liboggplay/oggplay_private.h1 >./src/liboggplay/oggplay_private.h
rm ./src/liboggplay/oggplay_private.h1
sed s/\#ifdef\ HAVE_INTTYPES_H/\#if\ HAVE_INTTYPES_H/g $1/src/liboggplay/oggplay_data.c >./src/liboggplay/oggplay_data.c
patch -p3 < bug485291_yuv_align.patch
patch -p3 < endian.patch
patch -p3 < trac466.patch
patch -p3 < bug492436.patch
patch -p3 < bug493140.patch
patch -p3 < bug481921.patch
patch -p3 < aspect_ratio.patch
patch -p3 < bug488951.patch
patch -p3 < bug488951_yuv_fix.patch