gecko-dev/gfx/ycbcr/yv24.patch

diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
index b22e778..cdbb040 100644
--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
@@ -6,16 +6,17 @@
 // http://www.fourcc.org/yuv.php
 // The actual conversion is best described here
 // http://en.wikipedia.org/wiki/YUV
 // An article on optimizing YUV conversion using tables instead of multiplies
 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf
 //
 // YV12 is a full plane of Y and a half height, half width chroma planes
 // YV16 is a full plane of Y and a full height, half width chroma planes
+// YV24 is a full plane of Y and a full height, full width chroma planes
 //
 // ARGB pixel format is output, which on little endian is stored as BGRA.
 // The alpha is set to 255, allowing the application to use RGBA or RGB32.

 #include "yuv_convert.h"

 // Header for low level row functions.
 #include "yuv_row.h"
@@ -33,50 +34,55 @@ void ConvertYCbCrToRGB32(const uint8* y_buf,
                          int pic_x,
                          int pic_y,
                          int pic_width,
                          int pic_height,
                          int y_pitch,
                          int uv_pitch,
                          int rgb_pitch,
                          YUVType yuv_type) {
-  unsigned int y_shift = yuv_type;
-  bool has_mmx = supports_mmx();
-  bool odd_pic_x = pic_x % 2 != 0;
+  unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
+  unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
+  // There is no optimized YV24 MMX routine so we check for this and
+  // fall back to the C code.
+  bool has_mmx = supports_mmx() && yuv_type != YV24;
+  bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
   int x_width = odd_pic_x ? pic_width - 1 : pic_width;

   for (int y = pic_y; y < pic_height + pic_y; ++y) {
     uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
     const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
-    const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> 1);
-    const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> 1);
+    const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
+    const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);

     if (odd_pic_x) {
       // Handle the single odd pixel manually and use the
       // fast routines for the remaining.
       FastConvertYUVToRGB32Row_C(y_ptr++,
                                  u_ptr++,
                                  v_ptr++,
                                  rgb_row,
-                                 1);
+                                 1,
+                                 x_shift);
       rgb_row += 4;
     }

     if (has_mmx)
       FastConvertYUVToRGB32Row(y_ptr,
                                u_ptr,
                                v_ptr,
                                rgb_row,
                                x_width);
     else
       FastConvertYUVToRGB32Row_C(y_ptr,
                                  u_ptr,
                                  v_ptr,
                                  rgb_row,
-                                 x_width);
+                                 x_width,
+                                 x_shift);
   }

   // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
   if (has_mmx)
     EMMS();
 }

 }  // namespace gfx
diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
index e624168..c0b678d 100644
--- a/gfx/ycbcr/yuv_convert.h
+++ b/gfx/ycbcr/yuv_convert.h
@@ -10,18 +10,19 @@

 namespace mozilla {

 namespace gfx {

 // Type of YUV surface.
 // The value of these enums matter as they are used to shift vertical indices.
 enum YUVType {
-  YV16 = 0,           // YV16 is half width and full height chroma channels.
-  YV12 = 1            // YV12 is half width and half height chroma channels.
+  YV12 = 0,           // YV12 is half width and half height chroma channels.
+  YV16 = 1,           // YV16 is half width and full height chroma channels.
+  YV24 = 2            // YV24 is full width and full height chroma channels.
 };

 // Convert a frame of YUV to 32 bit ARGB.
 // Pass in YV16/YV12 depending on source format
 NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
                                   const uint8* uplane,
                                   const uint8* vplane,
                                   uint8* rgbframe,
diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h
index 2a82972..d776dac 100644
--- a/gfx/ycbcr/yuv_row.h
+++ b/gfx/ycbcr/yuv_row.h
@@ -20,17 +20,18 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
                               const uint8* v_buf,
                               uint8* rgb_buf,
                               int width);

 void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
                                 const uint8* u_buf,
                                 const uint8* v_buf,
                                 uint8* rgb_buf,
-                                int width);
+                                int width,
+                                unsigned int x_shift);


 }  // extern "C"

 // x64 uses MMX2 (SSE) so emms is not required.
 #if !defined(ARCH_CPU_X86_64) && !defined(ARCH_CPU_PPC)
 #if defined(_MSC_VER)
 #define EMMS() __asm emms
diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp
index d3bdab4..36d9bda 100644
--- a/gfx/ycbcr/yuv_row_c.cpp
+++ b/gfx/ycbcr/yuv_row_c.cpp
@@ -153,24 +153,29 @@ static inline void YuvPixel(uint8 y,
                                         (clip(C298a + cr) << 16) |
                                         (0xff000000);
 }

 void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
                               const uint8* u_buf,
                               const uint8* v_buf,
                               uint8* rgb_buf,
-                              int width) {
+                              int width,
+                              unsigned int x_shift) {
   for (int x = 0; x < width; x += 2) {
-    uint8 u = u_buf[x >> 1];
-    uint8 v = v_buf[x >> 1];
+    uint8 u = u_buf[x >> x_shift];
+    uint8 v = v_buf[x >> x_shift];
     uint8 y0 = y_buf[x];
     YuvPixel(y0, u, v, rgb_buf);
     if ((x + 1) < width) {
       uint8 y1 = y_buf[x + 1];
+      if (x_shift == 0) {
+        u = u_buf[x + 1];
+        v = v_buf[x + 1];
+      }
       YuvPixel(y1, u, v, rgb_buf + 4);
     }
     rgb_buf += 8;  // Advance 2 pixels.
   }
 }

 }  // extern "C"

diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
index ce5ee89..455dd7b 100644
--- a/gfx/ycbcr/yuv_row_linux.cpp
+++ b/gfx/ycbcr/yuv_row_linux.cpp
@@ -13,17 +13,17 @@ extern "C" {

 #if defined(ARCH_CPU_ARM_FAMILY)
 // ARM implementation uses C fallback
 void FastConvertYUVToRGB32Row(const uint8* y_buf,
                               const uint8* u_buf,
                               const uint8* v_buf,
                               uint8* rgb_buf,
                               int width) {
-  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
+  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
 }

 #else

 #define RGBY(i) { \
   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp
index 34ecdc1..2a679cc 100644
--- a/gfx/ycbcr/yuv_row_mac.cpp
+++ b/gfx/ycbcr/yuv_row_mac.cpp
@@ -13,17 +13,17 @@ extern "C" {
 // option at all.
 #if defined(ARCH_CPU_PPC) || defined(ARCH_CPU_64_BITS)
 // PPC implementation uses C fallback
 void FastConvertYUVToRGB32Row(const uint8* y_buf,
                               const uint8* u_buf,
                               const uint8* v_buf,
                               uint8* rgb_buf,
                               int width) {
-  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
+  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
 }

 #else

 #define RGBY(i) { \
   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp
index d2b82c4..708ef14 100644
--- a/gfx/ycbcr/yuv_row_win.cpp
+++ b/gfx/ycbcr/yuv_row_win.cpp
@@ -9,17 +9,17 @@ extern "C" {
 // needs to be fixed for 64 bit builds.
 #if defined(ARCH_CPU_64_BITS)
 // PPC implementation uses C fallback
 void FastConvertYUVToRGB32Row(const uint8* y_buf,
                               const uint8* u_buf,
                               const uint8* v_buf,
                               uint8* rgb_buf,
                               int width) {
-  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
+  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
 }

 #else


 #define RGBY(i) { \
   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \