Bug 1811322 - P0 - vendor libyuv to b2528b0be934;r=webrtc-reviewers,mjf

Differential Revision: https://phabricator.services.mozilla.com/D167858
2025-02-26 20:30:41 +00:00 · 2023-01-25 22:33:52 +00:00 · 2023-01-25 22:33:52 +00:00 · c1bc9bcd59
commit c1bc9bcd59
parent 8d1005d303
36 changed files with 3177 additions and 1054 deletions
--- a/media/libyuv/libyuv/CMakeLists.txt
+++ b/media/libyuv/libyuv/CMakeLists.txt
@ -48,7 +48,7 @@ TARGET_LINK_LIBRARIES  ( yuvconstants ${ly_lib_static} )
 find_package ( JPEG )
 if (JPEG_FOUND)
  include_directories( ${JPEG_INCLUDE_DIR} )
-  target_link_libraries( yuvconvert ${JPEG_LIBRARY} )
+  target_link_libraries( ${ly_lib_shared} ${JPEG_LIBRARY} )
  add_definitions( -DHAVE_JPEG )
 endif()

--- a/media/libyuv/libyuv/DEPS
+++ b/media/libyuv/libyuv/DEPS
@ -5,7 +5,7 @@ gclient_gn_args = [

 vars = {
  'chromium_git': 'https://chromium.googlesource.com',
-  'chromium_revision': '1c174f8519b2926ff3e621467b6aa282b4934f4a',
+  'chromium_revision': '504c0697552240028c5412dafd2a7306a7cd4be7',
  'gn_version': 'git_revision:6f13aaac55a977e1948910942675c69f2b4f7a94',
  # ninja CIPD package version.
  # https://chrome-infra-packages.appspot.com/p/infra/3pp/tools/ninja
@ -15,17 +15,29 @@ vars = {

  # Keep the Chromium default of generating location tags.
  'generate_location_tags': True,
+
+  # By default, download the fuchsia sdk from the public sdk directory.
+  'fuchsia_sdk_cipd_prefix': 'fuchsia/sdk/gn/',
+  'fuchsia_version': 'version:10.20221110.2.1',
+  # By default, download the fuchsia images from the fuchsia GCS bucket.
+  'fuchsia_images_bucket': 'fuchsia',
+  'checkout_fuchsia': False,
+  # Since the images are hundreds of MB, default to only downloading the image
+  # most commonly useful for developers. Bots and developers that need to use
+  # other images can override this with additional images.
+  'checkout_fuchsia_boot_images': "terminal.qemu-x64",
+  'checkout_fuchsia_product_bundles': '"{checkout_fuchsia_boot_images}" != ""',
 }

 deps = {
  'src/build':
-    Var('chromium_git') + '/chromium/src/build' + '@' + '18e9d3c3adadf2489507e4e62afffafa46717d26',
+    Var('chromium_git') + '/chromium/src/build' + '@' + 'fe1231e1da1e95acb006f53d06caaad16756a376',
  'src/buildtools':
-    Var('chromium_git') + '/chromium/src/buildtools' + '@' + '33b52eafd539278600d34cd9ba23550d28c933d2',
+    Var('chromium_git') + '/chromium/src/buildtools' + '@' + '3c8fef071edb88facb7508060e978c5fb8608dd5',
  'src/testing':
-    Var('chromium_git') + '/chromium/src/testing' + '@' + 'aedf4723b9fcaf5a76164085f4a8e9797eee4bee',
+    Var('chromium_git') + '/chromium/src/testing' + '@' + 'b4dc828e84ae95e1f5bf855f040c065287dac335',
  'src/third_party':
-    Var('chromium_git') + '/chromium/src/third_party' + '@' + 'd6591989fa347099fd4c7d47ba8bf6ce900b4f8e',
+    Var('chromium_git') + '/chromium/src/third_party' + '@' + '73f7282fa28ca1fbe8401e391207fb6ccf34767f',

  'src/buildtools/linux64': {
    'packages': [
@ -71,30 +83,30 @@ deps = {
  'src/buildtools/clang_format/script':
    Var('chromium_git') + '/external/github.com/llvm/llvm-project/clang/tools/clang-format.git' + '@' + '8b525d2747f2584fc35d8c7e612e66f377858df7',
  'src/buildtools/third_party/libc++/trunk':
-    Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + 'fc6bbc5eb039769b5ed2de84444a3c6f9b45a598',
+    Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + 'cd0a05047451dfbdef5ba85f97ac4888e432a377',
  'src/buildtools/third_party/libc++abi/trunk':
-    Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '8dd405113a4f3694e910b79785dd7fb7535a888a',
+    Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '1a32724f721e1c3b6c590a07fe4a954344f15e48',
  'src/buildtools/third_party/libunwind/trunk':
-    Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + 'aabcd8753678f1536e15eb6385a948470debdae4',
+    Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + '5870472fdd17f33d923b02e3e0acb9cbb18dbc9a',

  'src/third_party/catapult':
-    Var('chromium_git') + '/catapult.git' + '@' + '3ffa6b222803f54188a7b249383b2f092a24d19a',
+    Var('chromium_git') + '/catapult.git' + '@' + '4efb51be8574f2969273012958eaae85d01ede0b',
  'src/third_party/colorama/src':
    Var('chromium_git') + '/external/colorama.git' + '@' + '799604a1041e9b3bc5d2789ecbd7e8db2e18e6b8',
  'src/third_party/depot_tools':
-    Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + 'b52683fa2e74087464d32a1a9c76bf1b5275e4fe',
+    Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + '2fc7e1ffd58b00601b47a5126201e5162911e244',
  'src/third_party/freetype/src':
-    Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + 'dea2e6358b2f963008d447d27564dd79890b61f0',
+    Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + '1c44de209cb465d175279dc30cd95f9857f703dd',
  'src/third_party/googletest/src':
    Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + 'af29db7ec28d6df1c7f0f745186884091e602e07',
  'src/third_party/harfbuzz-ng/src':
-    Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '56c467093598ec559a7148b61e112e9de52b7076',
+    Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '2822b589bc837fae6f66233e2cf2eef0f6ce8470',
  'src/third_party/libjpeg_turbo':
    Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + 'ed683925e4897a84b3bffc5c1414c85b97a129a3',
  'src/third_party/nasm':
    Var('chromium_git') + '/chromium/deps/nasm.git' + '@' + '0873b2bae6a5388a1c55deac8456e3c60a47ca08',
  'src/tools':
-    Var('chromium_git') + '/chromium/src/tools' + '@' + 'a185bbc6c077438a59a89a97c6c6ae30895e976c',
+    Var('chromium_git') + '/chromium/src/tools' + '@' + 'a20d904d021175f221bf58921a5a67fd48420ed9',

  # libyuv-only dependencies (not present in Chromium).
  'src/third_party/gtest-parallel':
@ -116,14 +128,10 @@ deps = {
      'condition': 'checkout_android',
      'dep_type': 'cipd',
  },
-  'src/third_party/auto/src': {
-    'url': Var('chromium_git') + '/external/github.com/google/auto.git' + '@' + '3659a0e6436d3acfeda04e0bd1df3603f1e7ffac',
-    'condition': 'checkout_android',
-  },
  'src/third_party/boringssl/src':
-    'https://boringssl.googlesource.com/boringssl.git' + '@' + '1ee71185a2322dc354bee5e5a0abfb1810a27dc6',
+    'https://boringssl.googlesource.com/boringssl.git' + '@' + 'f0518d45119dd4dd322a884669daf8247bc3c992',
  'src/base': {
-    'url': Var('chromium_git') + '/chromium/src/base' + '@' + '077682171b88d0aa0cb77a8e1cd4d959f58a20a3',
+    'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'f80120ff3265ba9bcb27416cc489343cfdc8bc61',
    'condition': 'checkout_android',
  },
  'src/third_party/bazel': {
@ -288,7 +296,7 @@ deps = {
  },

  'src/third_party/icu': {
-    'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'da07448619763d1cde255b361324242646f5b268',
+    'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + '1b7d391f0528fb3a4976b7541b387ee04f915f83',
  },
  'src/third_party/icu4j': {
      'packages': [
@ -329,7 +337,7 @@ deps = {
    'condition': 'checkout_android',
  },
  'src/third_party/junit/src': {
-    'url': Var('chromium_git') + '/external/junit.git' + '@' + '64155f8a9babcfcf4263cf4d08253a1556e75481',
+    'url': Var('chromium_git') + '/external/junit.git' + '@' + '05fe2a64f59127c02135be22f416e91260d6ede6',
    'condition': 'checkout_android',
  },
  'src/third_party/libunwindstack': {
@ -443,7 +451,7 @@ deps = {

  # iOS deps:
  'src/ios': {
-    'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '211070da56a62cf7d2f7c7a81be29b57294c4343',
+    'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '866ec86ecb27dad8a3ac7957590df7765a13834f',
    'condition': 'checkout_ios'
  },

@ -2245,29 +2253,74 @@ hooks = [
    'condition': 'checkout_mac',
  },
  {
-    'name': 'msan_chained_origins',
+    'name': 'msan_chained_origins_focal',
    'pattern': '.',
    'condition': 'checkout_instrumented_libraries',
    'action': [ 'python3',
                'src/third_party/depot_tools/download_from_google_storage.py',
-                "--no_resume",
-                "--no_auth",
-                "--bucket", "chromium-instrumented-libraries",
-                "-s", "src/third_party/instrumented_libraries/binaries/msan-chained-origins.tgz.sha1",
+                '--no_resume',
+                '--no_auth',
+                '--bucket', 'chromium-instrumented-libraries',
+                '-s', 'src/third_party/instrumented_libraries/binaries/msan-chained-origins-focal.tgz.sha1',
              ],
  },
  {
-    'name': 'msan_no_origins',
+    'name': 'msan_no_origins_focal',
    'pattern': '.',
    'condition': 'checkout_instrumented_libraries',
    'action': [ 'python3',
                'src/third_party/depot_tools/download_from_google_storage.py',
-                "--no_resume",
-                "--no_auth",
-                "--bucket", "chromium-instrumented-libraries",
-                "-s", "src/third_party/instrumented_libraries/binaries/msan-no-origins.tgz.sha1",
+                '--no_resume',
+                '--no_auth',
+                '--bucket', 'chromium-instrumented-libraries',
+                '-s', 'src/third_party/instrumented_libraries/binaries/msan-no-origins-focal.tgz.sha1',
              ],
  },
+  {
+    'name': 'msan_chained_origins_xenial',
+    'pattern': '.',
+    'condition': 'checkout_instrumented_libraries',
+    'action': [ 'python3',
+                'src/third_party/depot_tools/download_from_google_storage.py',
+                '--no_resume',
+                '--no_auth',
+                '--bucket', 'chromium-instrumented-libraries',
+                '-s', 'src/third_party/instrumented_libraries/binaries/msan-chained-origins-xenial.tgz.sha1',
+              ],
+  },
+  {
+    'name': 'msan_no_origins_xenial',
+    'pattern': '.',
+    'condition': 'checkout_instrumented_libraries',
+    'action': [ 'python3',
+                'src/third_party/depot_tools/download_from_google_storage.py',
+                '--no_resume',
+                '--no_auth',
+                '--bucket', 'chromium-instrumented-libraries',
+                '-s', 'src/third_party/instrumented_libraries/binaries/msan-no-origins-xenial.tgz.sha1',
+              ],
+  },
+  {
+    'name': 'Download Fuchsia SDK from GCS',
+    'pattern': '.',
+    'condition': 'checkout_fuchsia',
+    'action': [
+      'python3',
+      'src/build/fuchsia/update_sdk.py',
+      '--cipd-prefix={fuchsia_sdk_cipd_prefix}',
+      '--version={fuchsia_version}',
+    ],
+  },
+  {
+    'name': 'Download Fuchsia system images',
+    'pattern': '.',
+    'condition': 'checkout_fuchsia and checkout_fuchsia_product_bundles',
+    'action': [
+      'python3',
+      'src/build/fuchsia/update_product_bundles.py',
+      '{checkout_fuchsia_boot_images}',
+    ],
+  },
  {
    # Pull clang if needed or requested via GYP_DEFINES.
    # Note: On Win, this should run after win_toolchain, as it may use it.
--- a/media/libyuv/libyuv/README.chromium
+++ b/media/libyuv/libyuv/README.chromium
@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1850
+Version: 1857
 License: BSD
 License File: LICENSE

--- a/media/libyuv/libyuv/include/libyuv/convert.h
+++ b/media/libyuv/libyuv/include/libyuv/convert.h
@ -162,6 +162,22 @@ int MM21ToYUY2(const uint8_t* src_y,
               int width,
               int height);

+// Convert MT2T to P010
+// Note that src_y and src_uv point to packed 10-bit values, so the Y plane will
+// be 10 / 8 times the dimensions of the image. Also for this reason,
+// src_stride_y and src_stride_uv are given in bytes.
+LIBYUV_API
+int MT2TToP010(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_uv,
+               int src_stride_uv,
+               uint16_t* dst_y,
+               int dst_stride_y,
+               uint16_t* dst_uv,
+               int dst_stride_uv,
+               int width,
+               int height);
+
 // Convert I422 to NV21.
 LIBYUV_API
 int I422ToNV21(const uint8_t* src_y,
@ -283,6 +299,23 @@ int I210ToI422(const uint16_t* src_y,
               int width,
               int height);

+#define H410ToH420 I410ToI420
+LIBYUV_API
+int I410ToI420(const uint16_t* src_y,
+               int src_stride_y,
+               const uint16_t* src_u,
+               int src_stride_u,
+               const uint16_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_y,
+               int dst_stride_y,
+               uint8_t* dst_u,
+               int dst_stride_u,
+               uint8_t* dst_v,
+               int dst_stride_v,
+               int width,
+               int height);
+
 #define H410ToH444 I410ToI444
 LIBYUV_API
 int I410ToI444(const uint16_t* src_y,
@ -571,6 +604,36 @@ int NV16ToNV24(const uint8_t* src_y,
               int width,
               int height);

+// Convert P010 to I010.
+LIBYUV_API
+int P010ToI010(const uint16_t* src_y,
+               int src_stride_y,
+               const uint16_t* src_uv,
+               int src_stride_uv,
+               uint16_t* dst_y,
+               int dst_stride_y,
+               uint16_t* dst_u,
+               int dst_stride_u,
+               uint16_t* dst_v,
+               int dst_stride_v,
+               int width,
+               int height);
+
+// Convert P012 to I012.
+LIBYUV_API
+int P012ToI012(const uint16_t* src_y,
+               int src_stride_y,
+               const uint16_t* src_uv,
+               int src_stride_uv,
+               uint16_t* dst_y,
+               int dst_stride_y,
+               uint16_t* dst_u,
+               int dst_stride_u,
+               uint16_t* dst_v,
+               int dst_stride_v,
+               int width,
+               int height);
+
 // Convert P010 to P410.
 LIBYUV_API
 int P010ToP410(const uint16_t* src_y,
--- a/media/libyuv/libyuv/include/libyuv/planar_functions.h
+++ b/media/libyuv/libyuv/include/libyuv/planar_functions.h
@ -392,6 +392,24 @@ int I210Copy(const uint16_t* src_y,
             int width,
             int height);

+// Copy I410 to I410.
+#define I410ToI410 I410Copy
+LIBYUV_API
+int I410Copy(const uint16_t* src_y,
+             int src_stride_y,
+             const uint16_t* src_u,
+             int src_stride_u,
+             const uint16_t* src_v,
+             int src_stride_v,
+             uint16_t* dst_y,
+             int dst_stride_y,
+             uint16_t* dst_u,
+             int dst_stride_u,
+             uint16_t* dst_v,
+             int dst_stride_v,
+             int width,
+             int height);
+
 // Copy NV12. Supports inverting.
 LIBYUV_API
 int NV12Copy(const uint8_t* src_y,
--- a/media/libyuv/libyuv/include/libyuv/rotate.h
+++ b/media/libyuv/libyuv/include/libyuv/rotate.h
@ -85,6 +85,60 @@ int I444Rotate(const uint8_t* src_y,
               int height,
               enum RotationMode mode);

+// Rotate I010 frame.
+LIBYUV_API
+int I010Rotate(const uint16_t* src_y,
+               int src_stride_y,
+               const uint16_t* src_u,
+               int src_stride_u,
+               const uint16_t* src_v,
+               int src_stride_v,
+               uint16_t* dst_y,
+               int dst_stride_y,
+               uint16_t* dst_u,
+               int dst_stride_u,
+               uint16_t* dst_v,
+               int dst_stride_v,
+               int width,
+               int height,
+               enum RotationMode mode);
+
+// Rotate I210 frame.
+LIBYUV_API
+int I210Rotate(const uint16_t* src_y,
+               int src_stride_y,
+               const uint16_t* src_u,
+               int src_stride_u,
+               const uint16_t* src_v,
+               int src_stride_v,
+               uint16_t* dst_y,
+               int dst_stride_y,
+               uint16_t* dst_u,
+               int dst_stride_u,
+               uint16_t* dst_v,
+               int dst_stride_v,
+               int width,
+               int height,
+               enum RotationMode mode);
+
+// Rotate I410 frame.
+LIBYUV_API
+int I410Rotate(const uint16_t* src_y,
+               int src_stride_y,
+               const uint16_t* src_u,
+               int src_stride_u,
+               const uint16_t* src_v,
+               int src_stride_v,
+               uint16_t* dst_y,
+               int dst_stride_y,
+               uint16_t* dst_u,
+               int dst_stride_u,
+               uint16_t* dst_v,
+               int dst_stride_v,
+               int width,
+               int height,
+               enum RotationMode mode);
+
 // Rotate NV12 input and store in I420.
 LIBYUV_API
 int NV12ToI420Rotate(const uint8_t* src_y,
@ -156,6 +210,16 @@ void RotatePlane270(const uint8_t* src,
                    int width,
                    int height);

+// Rotate a plane by 0, 90, 180, or 270.
+LIBYUV_API
+int RotatePlane_16(const uint16_t* src,
+                   int src_stride,
+                   uint16_t* dst,
+                   int dst_stride,
+                   int width,
+                   int height,
+                   enum RotationMode mode);
+
 // Rotations for when U and V are interleaved.
 // These functions take one UV input pointer and
 // split the data into two buffers while
--- a/media/libyuv/libyuv/include/libyuv/rotate_row.h
+++ b/media/libyuv/libyuv/include/libyuv/rotate_row.h
@ -215,7 +215,23 @@ void TransposeUVWx16_Any_LSX(const uint8_t* src,
                             uint8_t* dst_b,
                             int dst_stride_b,
                             int width);
+void TransposeWxH_16_C(const uint16_t* src,
+                       int src_stride,
+                       uint16_t* dst,
+                       int dst_stride,
+                       int width,
+                       int height);

+void TransposeWx8_16_C(const uint16_t* src,
+                       int src_stride,
+                       uint16_t* dst,
+                       int dst_stride,
+                       int width);
+void TransposeWx1_16_C(const uint16_t* src,
+                       int src_stride,
+                       uint16_t* dst,
+                       int dst_stride,
+                       int width);
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv
--- a/media/libyuv/libyuv/include/libyuv/row.h
+++ b/media/libyuv/libyuv/include/libyuv/row.h
@ -11,7 +11,8 @@
 #ifndef INCLUDE_LIBYUV_ROW_H_
 #define INCLUDE_LIBYUV_ROW_H_

-#include <stdlib.h>  // For malloc.
+#include <stddef.h>  // For NULL
+#include <stdlib.h>  // For malloc

 #include "libyuv/basic_types.h"

@ -176,9 +177,8 @@ extern "C" {

 // The following functions fail on gcc/clang 32 bit with fpic and framepointer.
 // caveat: clangcl uses row_win.cc which works.
-#if !defined(MOZ_PROFILING) && \
-     (defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
-      defined(_MSC_VER))
+#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
+    defined(_MSC_VER)
 // TODO(fbarchard): fix build error on android_full_debug=1
 // https://code.google.com/p/libyuv/issues/detail?id=517
 #define HAS_I422ALPHATOARGBROW_SSSE3
@ -247,9 +247,8 @@ extern "C" {
 #define HAS_ARGBATTENUATEROW_AVX2
 #endif

-#if !defined(MOZ_PROFILING) && \
-  (defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
-   defined(_MSC_VER))
+#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
+    defined(_MSC_VER)
 // TODO(fbarchard): fix build error on android_full_debug=1
 // https://code.google.com/p/libyuv/issues/detail?id=517
 #define HAS_I422ALPHATOARGBROW_AVX2
@ -457,6 +456,7 @@ extern "C" {
 #define HAS_DETILEROW_NEON
 #define HAS_DETILESPLITUVROW_NEON
 #define HAS_DETILETOYUY2_NEON
+#define HAS_UNPACKMT2T_NEON
 #define HAS_DIVIDEROW_16_NEON
 #define HAS_HALFFLOATROW_NEON
 #define HAS_HALFMERGEUVROW_NEON
@ -686,6 +686,11 @@ extern "C" {
 #define HAS_SPLITUVROW_LSX
 #define HAS_UYVYTOARGBROW_LSX
 #define HAS_YUY2TOARGBROW_LSX
+#define HAS_ARGBTOYROW_LSX
+#define HAS_ABGRTOYJROW_LSX
+#define HAS_RGBATOYJROW_LSX
+#define HAS_RGB24TOYJROW_LSX
+#define HAS_RAWTOYJROW_LSX
 #endif

 #if !defined(LIBYUV_DISABLE_LASX) && defined(__loongarch_asx)
@ -713,6 +718,8 @@ extern "C" {
 #define HAS_ARGBTOUVROW_LASX
 #define HAS_ARGBTOYJROW_LASX
 #define HAS_ARGBTOYROW_LASX
+#define HAS_ABGRTOYJROW_LASX
+#define HAS_ABGRTOYROW_LASX
 #define HAS_I422ALPHATOARGBROW_LASX
 #define HAS_I422TOARGB1555ROW_LASX
 #define HAS_I422TOARGB4444ROW_LASX
@ -742,6 +749,11 @@ extern "C" {
 #define HAS_YUY2TOUV422ROW_LASX
 #define HAS_YUY2TOUVROW_LASX
 #define HAS_YUY2TOYROW_LASX
+#define HAS_RGBATOYROW_LASX
+#define HAS_RGBATOYJROW_LASX
+#define HAS_BGRATOYROW_LASX
+#define HAS_RGB24TOYJROW_LASX
+#define HAS_RAWTOYJROW_LASX
 #endif

 #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
@ -830,13 +842,21 @@ struct YuvConstants {

 #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))

-#define align_buffer_64(var, size)                                           \
-  uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63));         /* NOLINT */ \
-  uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */
+#define align_buffer_64(var, size)                                         \
+  void* var##_mem = malloc((size) + 63);                      /* NOLINT */ \
+  uint8_t* var = (uint8_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */

 #define free_aligned_buffer_64(var) \
  free(var##_mem);                  \
-  var = 0
+  var = NULL
+
+#define align_buffer_64_16(var, size)                                        \
+  void* var##_mem = malloc((size)*2 + 63);                      /* NOLINT */ \
+  uint16_t* var = (uint16_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */
+
+#define free_aligned_buffer_64_16(var) \
+  free(var##_mem);                     \
+  var = NULL

 #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
 #define OMITFP
@ -1193,9 +1213,14 @@ void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
 void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
 void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
 void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ARGBToYRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
 void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
 void ARGBToYJRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ABGRToYJRow_LSX(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
+void RGBAToYJRow_LSX(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
 void ARGBToYJRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ABGRToYJRow_LASX(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
+void RGBAToYJRow_LASX(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
 void ARGBToUV444Row_NEON(const uint8_t* src_argb,
                         uint8_t* dst_u,
                         uint8_t* dst_v,
@ -1419,6 +1444,8 @@ void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width);
 void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width);
 void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width);
 void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
+void RGB24ToYJRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
+void ABGRToYRow_LASX(const uint8_t* src_abgr, uint8_t* dst_y, int width);
 void ARGB1555ToYRow_LASX(const uint8_t* src_argb1555,
                         uint8_t* dst_y,
                         int width);
@ -1428,6 +1455,11 @@ void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
 void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
 void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width);
 void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width);
+void RGBAToYRow_LASX(const uint8_t* src_rgba, uint8_t* dst_y, int width);
+void BGRAToYRow_LASX(const uint8_t* src_bgra, uint8_t* dst_y, int width);
+void RGB24ToYJRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
+void RAWToYJRow_LSX(const uint8_t* src_raw, uint8_t* dst_yj, int width);
+void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width);

 void ARGBToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
 void ARGBToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
@ -1491,10 +1523,15 @@ void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr,
 void BGRAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void ABGRToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void RGBAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void ARGBToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void RGB24ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void RGB565ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void RAWToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGBAToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB24ToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RAWToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void ARGB1555ToYRow_Any_LSX(const uint8_t* src_ptr,
                            uint8_t* dst_ptr,
                            int width);
@ -1503,7 +1540,14 @@ void RGB565ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void RGB24ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void ARGBToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void ARGBToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void RAWToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGBAToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGBAToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void BGRAToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB24ToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RAWToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void ARGB1555ToYRow_Any_LASX(const uint8_t* src_ptr,
                             uint8_t* dst_ptr,
                             int width);
@ -1935,6 +1979,8 @@ void MirrorSplitUVRow_C(const uint8_t* src_uv,
                        uint8_t* dst_v,
                        int width);

+void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width);
+
 void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
 void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
 void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
@ -2124,6 +2170,8 @@ void DetileToYUY2_Any_NEON(const uint8_t* src_y,
                           ptrdiff_t src_uv_tile_stride,
                           uint8_t* dst_yuy2,
                           int width);
+void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size);
+void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size);
 void MergeUVRow_C(const uint8_t* src_u,
                  const uint8_t* src_v,
                  uint8_t* dst_uv,
--- a/media/libyuv/libyuv/include/libyuv/scale_row.h
+++ b/media/libyuv/libyuv/include/libyuv/scale_row.h
@ -214,6 +214,17 @@ void ScalePlaneVertical_16To8(int src_height,
                              int scale,
                              enum FilterMode filtering);

+void ScalePlaneDown2_16To8(int src_width,
+                           int src_height,
+                           int dst_width,
+                           int dst_height,
+                           int src_stride,
+                           int dst_stride,
+                           const uint16_t* src_ptr,
+                           uint8_t* dst_ptr,
+                           int scale,
+                           enum FilterMode filtering);
+
 // Simplify the filtering based on scale factors.
 enum FilterMode ScaleFilterReduce(int src_width,
                                  int src_height,
@ -259,6 +270,16 @@ void ScaleRowDown2_16_C(const uint16_t* src_ptr,
                        ptrdiff_t src_stride,
                        uint16_t* dst,
                        int dst_width);
+void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
+                           ptrdiff_t src_stride,
+                           uint8_t* dst,
+                           int dst_width,
+                           int scale);
+void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint8_t* dst,
+                               int dst_width,
+                               int scale);
 void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
                           ptrdiff_t src_stride,
                           uint8_t* dst,
@ -267,6 +288,16 @@ void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
                              ptrdiff_t src_stride,
                              uint16_t* dst,
                              int dst_width);
+void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
+                                 ptrdiff_t src_stride,
+                                 uint8_t* dst,
+                                 int dst_width,
+                                 int scale);
+void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr,
+                                     ptrdiff_t src_stride,
+                                     uint8_t* dst,
+                                     int dst_width,
+                                     int scale);
 void ScaleRowDown2Box_C(const uint8_t* src_ptr,
                        ptrdiff_t src_stride,
                        uint8_t* dst,
@ -279,6 +310,16 @@ void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
                           ptrdiff_t src_stride,
                           uint16_t* dst,
                           int dst_width);
+void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst,
+                              int dst_width,
+                              int scale);
+void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr,
+                                  ptrdiff_t src_stride,
+                                  uint8_t* dst,
+                                  int dst_width,
+                                  int scale);
 void ScaleRowDown4_C(const uint8_t* src_ptr,
                     ptrdiff_t src_stride,
                     uint8_t* dst,
--- a/media/libyuv/libyuv/include/libyuv/version.h
+++ b/media/libyuv/libyuv/include/libyuv/version.h
@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_

-#define LIBYUV_VERSION 1850
+#define LIBYUV_VERSION 1857

 #endif  // INCLUDE_LIBYUV_VERSION_H_
--- a/media/libyuv/libyuv/libyuv.gyp
+++ b/media/libyuv/libyuv/libyuv.gyp
@ -52,7 +52,7 @@
        'optimize': 'max',  # enable O2 and ltcg.
      },
      # Allows libyuv.a redistributable library without external dependencies.
-      # 'standalone_static_library': 1,
+      'standalone_static_library': 1,
      'conditions': [
       # Disable -Wunused-parameter
        ['clang == 1', {
@ -70,11 +70,6 @@
            '-mfpu=vfpv3-d16',
            # '-mthumb',  # arm32 not thumb
          ],
-          'cflags_mozilla!': [
-            '-mfpu=vfp',
-            '-mfpu=vfpv3',
-            '-mfpu=vfpv3-d16',
-          ],
          'conditions': [
            # Disable LTO in libyuv_neon target due to gcc 4.9 compiler bug.
            ['clang == 0 and use_lto == 1', {
@ -89,9 +84,6 @@
                '-mfpu=neon',
                # '-marm',  # arm32 not thumb
              ],
-              'cflags_mozilla': [
-                '-mfpu=neon',
-              ],
            }],
          ],
        }],
@ -100,15 +92,7 @@
            'LIBYUV_MSA',
          ],
        }],
-        ['build_with_mozilla == 1', {
-          'defines': [
-            'HAVE_JPEG'
-          ],
-          'cflags_mozilla': [
-            '$(MOZ_JPEG_CFLAGS)',
-          ],
-        }],
-        ['OS != "ios" and libyuv_disable_jpeg != 1 and build_with_mozilla != 1', {
+        ['OS != "ios" and libyuv_disable_jpeg != 1', {
          'defines': [
            'HAVE_JPEG'
          ],
--- a/media/libyuv/libyuv/source/compare.cc
+++ b/media/libyuv/libyuv/source/compare.cc
@ -45,7 +45,7 @@ uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) {
  }
 #endif

-  while (count >= (uint64_t)(kBlockSize)) {
+  while (count >= (uint64_t)kBlockSize) {
    seed = HashDjb2_SSE(src, kBlockSize, seed);
    src += kBlockSize;
    count -= kBlockSize;
@ -359,10 +359,10 @@ static double Ssim8x8_C(const uint8_t* src_a,
        (sum_a_sq + sum_b_sq + c1) *
        (count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2);

-    if (ssim_d == 0.0) {
+    if (ssim_d == 0) {
      return DBL_MAX;
    }
-    return ssim_n * 1.0 / ssim_d;
+    return (double)ssim_n / (double)ssim_d;
  }
 }

--- a/media/libyuv/libyuv/source/compare_gcc.cc
+++ b/media/libyuv/libyuv/source/compare_gcc.cc
@ -67,7 +67,7 @@ uint32_t HammingDistance_SSE42(const uint8_t* src_a,
      :
      : "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10");

-  return static_cast<uint32_t>(diff);
+  return (uint32_t)(diff);
 }
 #else
 uint32_t HammingDistance_SSE42(const uint8_t* src_a,
--- a/media/libyuv/libyuv/source/convert.cc
+++ b/media/libyuv/libyuv/source/convert.cc
@ -24,6 +24,10 @@ namespace libyuv {
 extern "C" {
 #endif

+// Subsample amount uses a shift.
+//   v is value
+//   a is amount to add to round up
+//   s is shift to subsample down
 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
 static __inline int Abs(int v) {
  return v >= 0 ? v : -v;
@ -291,6 +295,52 @@ int I210ToI422(const uint16_t* src_y,
                           0, 10);
 }

+LIBYUV_API
+int I410ToI420(const uint16_t* src_y,
+               int src_stride_y,
+               const uint16_t* src_u,
+               int src_stride_u,
+               const uint16_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_y,
+               int dst_stride_y,
+               uint8_t* dst_u,
+               int dst_stride_u,
+               uint8_t* dst_v,
+               int dst_stride_v,
+               int width,
+               int height) {
+  const int depth = 10;
+  const int scale = 1 << (24 - depth);
+
+  if (width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_u = src_u + (height - 1) * src_stride_u;
+    src_v = src_v + (height - 1) * src_stride_v;
+    src_stride_y = -src_stride_y;
+    src_stride_u = -src_stride_u;
+    src_stride_v = -src_stride_v;
+  }
+
+  {
+    const int uv_width = SUBSAMPLE(width, 1, 1);
+    const int uv_height = SUBSAMPLE(height, 1, 1);
+
+    Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width,
+                      height);
+    ScalePlaneDown2_16To8(width, height, uv_width, uv_height, src_stride_u,
+                          dst_stride_u, src_u, dst_u, scale, kFilterBilinear);
+    ScalePlaneDown2_16To8(width, height, uv_width, uv_height, src_stride_v,
+                          dst_stride_v, src_v, dst_v, scale, kFilterBilinear);
+  }
+  return 0;
+}
+
 LIBYUV_API
 int I410ToI444(const uint16_t* src_y,
               int src_stride_y,
@ -732,6 +782,92 @@ int MM21ToYUY2(const uint8_t* src_y,
  return 0;
 }

+// Convert MT2T into P010. See tinyurl.com/mtk-10bit-video-format for format
+// documentation.
+// TODO(greenjustin): Add an MT2T to I420 conversion.
+LIBYUV_API
+int MT2TToP010(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_uv,
+               int src_stride_uv,
+               uint16_t* dst_y,
+               int dst_stride_y,
+               uint16_t* dst_uv,
+               int dst_stride_uv,
+               int width,
+               int height) {
+  if (width <= 0 || !height || !src_uv || !dst_uv) {
+    return -1;
+  }
+
+  {
+    int u_width = (width + 1) / 2;
+    int uv_width = 2 * u_width;
+    int y = 0;
+    int uv_height = uv_height = (height + 1) / 2;
+    const int tile_width = 16;
+    const int y_tile_height = 32;
+    const int uv_tile_height = 16;
+    int padded_width = (width + tile_width - 1) & ~(tile_width - 1);
+    int y_tile_row_size = padded_width * y_tile_height * 10 / 8;
+    int uv_tile_row_size = padded_width * uv_tile_height * 10 / 8;
+    size_t row_buf_size = padded_width * y_tile_height * sizeof(uint16_t);
+    void (*UnpackMT2T)(const uint8_t* src, uint16_t* dst, size_t size) =
+        UnpackMT2T_C;
+    align_buffer_64(row_buf, row_buf_size);
+
+#if defined(HAS_UNPACKMT2T_NEON)
+    if (TestCpuFlag(kCpuHasNEON)) {
+      UnpackMT2T = UnpackMT2T_NEON;
+    }
+#endif
+    // Negative height means invert the image.
+    if (height < 0) {
+      height = -height;
+      uv_height = (height + 1) / 2;
+      if (dst_y) {
+        dst_y = dst_y + (height - 1) * dst_stride_y;
+        dst_stride_y = -dst_stride_y;
+      }
+      dst_uv = dst_uv + (uv_height - 1) * dst_stride_uv;
+      dst_stride_uv = -dst_stride_uv;
+    }
+
+    // Unpack and detile Y in rows of tiles
+    if (src_y && dst_y) {
+      for (y = 0; y < (height & ~(y_tile_height - 1)); y += y_tile_height) {
+        UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
+        DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
+                       width, y_tile_height, y_tile_height);
+        src_y += src_stride_y * y_tile_height;
+        dst_y += dst_stride_y * y_tile_height;
+      }
+      if (height & (y_tile_height - 1)) {
+        UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
+        DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
+                       width, height & (y_tile_height - 1), y_tile_height);
+      }
+    }
+
+    // Unpack and detile UV plane
+    for (y = 0; y < (uv_height & ~(uv_tile_height - 1)); y += uv_tile_height) {
+      UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
+      DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
+                     uv_width, uv_tile_height, uv_tile_height);
+      src_uv += src_stride_uv * uv_tile_height;
+      dst_uv += dst_stride_uv * uv_tile_height;
+    }
+    if (uv_height & (uv_tile_height - 1)) {
+      UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
+      DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
+                     uv_width, uv_height & (uv_tile_height - 1),
+                     uv_tile_height);
+    }
+    free_aligned_buffer_64(row_buf);
+  }
+  return 0;
+}
+
 #ifdef I422TONV21_ROW_VERSION
 // Unittest fails for this version.
 // 422 chroma is 1/2 width, 1x height
@ -753,7 +889,7 @@ int I422ToNV21(const uint8_t* src_y,
  int y;
  void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
                     uint8_t* dst_uv, int width) = MergeUVRow_C;
-  void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+  void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
                         ptrdiff_t src_stride, int dst_width,
                         int source_y_fraction) = InterpolateRow_C;
  int halfwidth = (width + 1) >> 1;
@ -1137,6 +1273,70 @@ int NV16ToNV24(const uint8_t* src_y,
  return 0;
 }

+// Any P[420]1[02] to I[420]1[02] format with mirroring.
+static int PxxxToIxxx(const uint16_t* src_y,
+                      int src_stride_y,
+                      const uint16_t* src_uv,
+                      int src_stride_uv,
+                      uint16_t* dst_y,
+                      int dst_stride_y,
+                      uint16_t* dst_u,
+                      int dst_stride_u,
+                      uint16_t* dst_v,
+                      int dst_stride_v,
+                      int width,
+                      int height,
+                      int subsample_x,
+                      int subsample_y,
+                      int depth) {
+  const int uv_width = SUBSAMPLE(width, subsample_x, subsample_x);
+  const int uv_height = SUBSAMPLE(height, subsample_y, subsample_y);
+  if (width <= 0 || height == 0) {
+    return -1;
+  }
+  ConvertToLSBPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height,
+                       depth);
+  SplitUVPlane_16(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
+                  dst_stride_v, uv_width, uv_height, depth);
+  return 0;
+}
+
+LIBYUV_API
+int P010ToI010(const uint16_t* src_y,
+               int src_stride_y,
+               const uint16_t* src_uv,
+               int src_stride_uv,
+               uint16_t* dst_y,
+               int dst_stride_y,
+               uint16_t* dst_u,
+               int dst_stride_u,
+               uint16_t* dst_v,
+               int dst_stride_v,
+               int width,
+               int height) {
+  return PxxxToIxxx(src_y, src_stride_y, src_uv, src_stride_uv, dst_y,
+                    dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
+                    width, height, 1, 1, 10);
+}
+
+LIBYUV_API
+int P012ToI012(const uint16_t* src_y,
+               int src_stride_y,
+               const uint16_t* src_uv,
+               int src_stride_uv,
+               uint16_t* dst_y,
+               int dst_stride_y,
+               uint16_t* dst_u,
+               int dst_stride_u,
+               uint16_t* dst_v,
+               int dst_stride_v,
+               int width,
+               int height) {
+  return PxxxToIxxx(src_y, src_stride_y, src_uv, src_stride_uv, dst_y,
+                    dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
+                    width, height, 1, 1, 12);
+}
+
 LIBYUV_API
 int P010ToP410(const uint16_t* src_y,
               int src_stride_y,
@ -1593,6 +1793,14 @@ int ARGBToI420(const uint8_t* src_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOYROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    ARGBToYRow = ARGBToYRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_LSX;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
  if (TestCpuFlag(kCpuHasLASX)) {
    ARGBToYRow = ARGBToYRow_Any_LASX;
@ -1707,13 +1915,21 @@ int BGRAToI420(const uint8_t* src_bgra,
    }
  }
 #endif
-#if defined(HAS_BGRATOYROW_LASX) && defined(HAS_BGRATOUVROW_LASX)
+#if defined(HAS_BGRATOYROW_LSX) && defined(HAS_BGRATOUVROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    BGRAToYRow = BGRAToYRow_Any_LSX;
+    BGRAToUVRow = BGRAToUVRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      BGRAToYRow = BGRAToYRow_LSX;
+      BGRAToUVRow = BGRAToUVRow_LSX;
+    }
+  }
+#endif
+#if defined(HAS_BGRATOYROW_LASX)
  if (TestCpuFlag(kCpuHasLASX)) {
    BGRAToYRow = BGRAToYRow_Any_LASX;
-    BGRAToUVRow = BGRAToUVRow_Any_LASX;
    if (IS_ALIGNED(width, 32)) {
      BGRAToYRow = BGRAToYRow_LASX;
-      BGRAToUVRow = BGRAToUVRow_LASX;
    }
  }
 #endif
@ -1829,6 +2045,14 @@ int ABGRToI420(const uint8_t* src_abgr,
    }
  }
 #endif
+#if defined(HAS_ABGRTOYROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ABGRToYRow = ABGRToYRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ABGRToYRow = ABGRToYRow_LASX;
+    }
+  }
+#endif

  for (y = 0; y < height - 1; y += 2) {
    ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
@ -1925,6 +2149,14 @@ int RGBAToI420(const uint8_t* src_rgba,
    }
  }
 #endif
+#if defined(HAS_RGBATOYROW_LASX)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    RGBAToYRow = RGBAToYRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      RGBAToYRow = RGBAToYRow_LASX;
+    }
+  }
+#endif

  for (y = 0; y < height - 1; y += 2) {
    RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
@ -2183,6 +2415,22 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
    }
  }
 #endif
+#if defined(HAS_RGB24TOYJROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    RGB24ToYJRow = RGB24ToYJRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      RGB24ToYJRow = RGB24ToYJRow_LSX;
+    }
+  }
+#endif
+#if defined(HAS_RGB24TOYJROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    RGB24ToYJRow = RGB24ToYJRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      RGB24ToYJRow = RGB24ToYJRow_LASX;
+    }
+  }
+#endif

 // Other platforms do intermediate conversion from RGB24 to ARGB.
 #else  // HAS_RGB24TOYJROW
@ -2511,6 +2759,22 @@ int RAWToJ420(const uint8_t* src_raw,
    }
  }
 #endif
+#if defined(HAS_RAWTOYJROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    RAWToYJRow = RAWToYJRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      RAWToYJRow = RAWToYJRow_LSX;
+    }
+  }
+#endif
+#if defined(HAS_RAWTOYJROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    RAWToYJRow = RAWToYJRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      RAWToYJRow = RAWToYJRow_LASX;
+    }
+  }
+#endif

 // Other platforms do intermediate conversion from RAW to ARGB.
 #else  // HAS_RAWTOYJROW
@ -3098,6 +3362,14 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
    }
  }
 #endif
+#if defined(HAS_ARGBTOYROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    ARGBToYRow = ARGBToYRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_LSX;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
  if (TestCpuFlag(kCpuHasLASX)) {
    ARGBToYRow = ARGBToYRow_Any_LASX;
@ -3210,6 +3482,22 @@ int RGB24ToJ400(const uint8_t* src_rgb24,
    }
  }
 #endif
+#if defined(HAS_RGB24TOYJROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    RGB24ToYJRow = RGB24ToYJRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      RGB24ToYJRow = RGB24ToYJRow_LSX;
+    }
+  }
+#endif
+#if defined(HAS_RGB24TOYJROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    RGB24ToYJRow = RGB24ToYJRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      RGB24ToYJRow = RGB24ToYJRow_LASX;
+    }
+  }
+#endif

  for (y = 0; y < height; ++y) {
    RGB24ToYJRow(src_rgb24, dst_yj, width);
@ -3278,6 +3566,22 @@ int RAWToJ400(const uint8_t* src_raw,
    }
  }
 #endif
+#if defined(HAS_RAWTOYJROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    RAWToYJRow = RAWToYJRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      RAWToYJRow = RAWToYJRow_LSX;
+    }
+  }
+#endif
+#if defined(HAS_RAWTOYJROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    RAWToYJRow = RAWToYJRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      RAWToYJRow = RAWToYJRow_LASX;
+    }
+  }
+#endif

  for (y = 0; y < height; ++y) {
    RAWToYJRow(src_raw, dst_yj, width);
--- a/media/libyuv/libyuv/source/convert_from_argb.cc
+++ b/media/libyuv/libyuv/source/convert_from_argb.cc
@ -116,6 +116,14 @@ int ARGBToI444(const uint8_t* src_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOYROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    ARGBToYRow = ARGBToYRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_LSX;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_LASX)
  if (TestCpuFlag(kCpuHasLASX)) {
    ARGBToYRow = ARGBToYRow_Any_LASX;
@ -230,7 +238,14 @@ int ARGBToI422(const uint8_t* src_argb,
    }
  }
 #endif
-
+#if defined(HAS_ARGBTOYROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    ARGBToYRow = ARGBToYRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_LSX;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
  if (TestCpuFlag(kCpuHasLASX)) {
    ARGBToYRow = ARGBToYRow_Any_LASX;
@ -340,6 +355,14 @@ int ARGBToNV12(const uint8_t* src_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOYROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    ARGBToYRow = ARGBToYRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_LSX;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
  if (TestCpuFlag(kCpuHasLASX)) {
    ARGBToYRow = ARGBToYRow_Any_LASX;
@ -502,6 +525,14 @@ int ARGBToNV21(const uint8_t* src_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOYROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    ARGBToYRow = ARGBToYRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_LSX;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
  if (TestCpuFlag(kCpuHasLASX)) {
    ARGBToYRow = ARGBToYRow_Any_LASX;
@ -663,6 +694,22 @@ int ABGRToNV12(const uint8_t* src_abgr,
    }
  }
 #endif
+#if defined(HAS_ABGRTOYROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    ABGRToYRow = ABGRToYRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      ABGRToYRow = ABGRToYRow_LSX;
+    }
+  }
+#endif
+#if defined(HAS_ABGRTOYROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ABGRToYRow = ABGRToYRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ABGRToYRow = ABGRToYRow_LASX;
+    }
+  }
+#endif
 #if defined(HAS_MERGEUVROW_SSE2)
  if (TestCpuFlag(kCpuHasSSE2)) {
    MergeUVRow_ = MergeUVRow_Any_SSE2;
@ -815,6 +862,22 @@ int ABGRToNV21(const uint8_t* src_abgr,
    }
  }
 #endif
+#if defined(HAS_ABGRTOYROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    ABGRToYRow = ABGRToYRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      ABGRToYRow = ABGRToYRow_LSX;
+    }
+  }
+#endif
+#if defined(HAS_ABGRTOYROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ABGRToYRow = ABGRToYRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ABGRToYRow = ABGRToYRow_LASX;
+    }
+  }
+#endif
 #if defined(HAS_MERGEUVROW_SSE2)
  if (TestCpuFlag(kCpuHasSSE2)) {
    MergeUVRow_ = MergeUVRow_Any_SSE2;
@ -972,6 +1035,14 @@ int ARGBToYUY2(const uint8_t* src_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOYROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    ARGBToYRow = ARGBToYRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_LSX;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
  if (TestCpuFlag(kCpuHasLASX)) {
    ARGBToYRow = ARGBToYRow_Any_LASX;
@ -1135,6 +1206,14 @@ int ARGBToUYVY(const uint8_t* src_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOYROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    ARGBToYRow = ARGBToYRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_LSX;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
  if (TestCpuFlag(kCpuHasLASX)) {
    ARGBToYRow = ARGBToYRow_Any_LASX;
@ -1262,6 +1341,14 @@ int ARGBToI400(const uint8_t* src_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOYROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    ARGBToYRow = ARGBToYRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_LSX;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_LASX)
  if (TestCpuFlag(kCpuHasLASX)) {
    ARGBToYRow = ARGBToYRow_Any_LASX;
@ -1939,6 +2026,16 @@ int ARGBToJ420(const uint8_t* src_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOYJROW_LSX) && defined(HAS_ARGBTOUVJROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    ARGBToYJRow = ARGBToYJRow_Any_LSX;
+    ARGBToUVJRow = ARGBToUVJRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYJRow = ARGBToYJRow_LSX;
+      ARGBToUVJRow = ARGBToUVJRow_LSX;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYJROW_LASX) && defined(HAS_ARGBTOUVJROW_LASX)
  if (TestCpuFlag(kCpuHasLASX)) {
    ARGBToYJRow = ARGBToYJRow_Any_LASX;
@ -2215,6 +2312,22 @@ int RGBAToJ400(const uint8_t* src_rgba,
    }
  }
 #endif
+#if defined(HAS_RGBATOYJROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    RGBAToYJRow = RGBAToYJRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      RGBAToYJRow = RGBAToYJRow_LSX;
+    }
+  }
+#endif
+#if defined(HAS_RGBATOYJROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    RGBAToYJRow = RGBAToYJRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      RGBAToYJRow = RGBAToYJRow_LASX;
+    }
+  }
+#endif

  for (y = 0; y < height; ++y) {
    RGBAToYJRow(src_rgba, dst_yj, width);
@ -2309,13 +2422,19 @@ int ABGRToJ420(const uint8_t* src_abgr,
    }
  }
 #endif
-#if defined(HAS_ABGRTOYJROW_LSX) && defined(HAS_ABGRTOUVJROW_LSX)
+#if defined(HAS_ABGRTOYJROW_LSX)
  if (TestCpuFlag(kCpuHasLSX)) {
    ABGRToYJRow = ABGRToYJRow_Any_LSX;
-    ABGRToUVJRow = ABGRToUVJRow_Any_LSX;
    if (IS_ALIGNED(width, 16)) {
      ABGRToYJRow = ABGRToYJRow_LSX;
-      ABGRToUVJRow = ABGRToUVJRow_LSX;
+    }
+  }
+#endif
+#if defined(HAS_ABGRTOYJROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ABGRToYJRow = ABGRToYJRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ABGRToYJRow = ABGRToYJRow_LASX;
    }
  }
 #endif
@ -2430,23 +2549,19 @@ int ABGRToJ422(const uint8_t* src_abgr,
    }
  }
 #endif
-#if defined(HAS_ABGRTOYJROW_LSX) && defined(HAS_ABGRTOUVJROW_LSX)
+#if defined(HAS_ABGRTOYJROW_LSX)
  if (TestCpuFlag(kCpuHasLSX)) {
    ABGRToYJRow = ABGRToYJRow_Any_LSX;
-    ABGRToUVJRow = ABGRToUVJRow_Any_LSX;
    if (IS_ALIGNED(width, 16)) {
      ABGRToYJRow = ABGRToYJRow_LSX;
-      ABGRToUVJRow = ABGRToUVJRow_LSX;
    }
  }
 #endif
-#if defined(HAS_ABGRTOYJROW_LASX) && defined(HAS_ABGRTOUVJROW_LASX)
+#if defined(HAS_ABGRTOYJROW_LASX)
  if (TestCpuFlag(kCpuHasLASX)) {
    ABGRToYJRow = ABGRToYJRow_Any_LASX;
-    ABGRToUVJRow = ABGRToUVJRow_Any_LASX;
    if (IS_ALIGNED(width, 32)) {
      ABGRToYJRow = ABGRToYJRow_LASX;
-      ABGRToUVJRow = ABGRToUVJRow_LASX;
    }
  }
 #endif
@ -2519,6 +2634,22 @@ int ABGRToJ400(const uint8_t* src_abgr,
    }
  }
 #endif
+#if defined(HAS_ABGRTOYJROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    ABGRToYJRow = ABGRToYJRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      ABGRToYJRow = ABGRToYJRow_LSX;
+    }
+  }
+#endif
+#if defined(HAS_ABGRTOYJROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ABGRToYJRow = ABGRToYJRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ABGRToYJRow = ABGRToYJRow_LASX;
+    }
+  }
+#endif

  for (y = 0; y < height; ++y) {
    ABGRToYJRow(src_abgr, dst_yj, width);
@ -2713,6 +2844,22 @@ int RAWToJNV21(const uint8_t* src_raw,
    }
  }
 #endif
+#if defined(HAS_RAWTOYJROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    RAWToYJRow = RAWToYJRow_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      RAWToYJRow = RAWToYJRow_LSX;
+    }
+  }
+#endif
+#if defined(HAS_RAWTOYJROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    RAWToYJRow = RAWToYJRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      RAWToYJRow = RAWToYJRow_LASX;
+    }
+  }
+#endif

 // Other platforms do intermediate conversion from RAW to ARGB.
 #else  // HAS_RAWTOYJROW
--- a/media/libyuv/libyuv/source/mjpeg_decoder.cc
+++ b/media/libyuv/libyuv/source/mjpeg_decoder.cc
@ -79,9 +79,7 @@ MJpegDecoder::MJpegDecoder()
  decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
  // Override standard exit()-based error handler.
  error_mgr_->base.error_exit = &ErrorHandler;
-#ifndef DEBUG_MJPEG
  error_mgr_->base.output_message = &OutputHandler;
-#endif
 #endif
  decompress_struct_->client_data = NULL;
  source_mgr_->init_source = &init_source;
@ -111,7 +109,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8_t* src, size_t src_len) {
  }

  buf_.data = src;
-  buf_.len = static_cast<int>(src_len);
+  buf_.len = (int)src_len;
  buf_vec_.pos = 0;
  decompress_struct_->client_data = &buf_vec_;
 #ifdef HAVE_SETJMP
@ -430,7 +428,7 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {

 void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {  // NOLINT
  jpeg_source_mgr* src = cinfo->src;
-  size_t bytes = static_cast<size_t>(num_bytes);
+  size_t bytes = (size_t)num_bytes;
  if (bytes > src->bytes_in_buffer) {
    src->next_input_byte = nullptr;
    src->bytes_in_buffer = 0;
@ -465,12 +463,11 @@ void ErrorHandler(j_common_ptr cinfo) {
  longjmp(mgr->setjmp_buffer, 1);
 }

-#ifndef DEBUG_MJPEG
 // Suppress fprintf warnings.
 void OutputHandler(j_common_ptr cinfo) {
  (void)cinfo;
 }
-#endif
+
 #endif  // HAVE_SETJMP

 void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
--- a/media/libyuv/libyuv/source/planar_functions.cc
+++ b/media/libyuv/libyuv/source/planar_functions.cc
@ -333,6 +333,45 @@ int I210Copy(const uint16_t* src_y,
  return 0;
 }

+// Copy I410.
+LIBYUV_API
+int I410Copy(const uint16_t* src_y,
+             int src_stride_y,
+             const uint16_t* src_u,
+             int src_stride_u,
+             const uint16_t* src_v,
+             int src_stride_v,
+             uint16_t* dst_y,
+             int dst_stride_y,
+             uint16_t* dst_u,
+             int dst_stride_u,
+             uint16_t* dst_v,
+             int dst_stride_v,
+             int width,
+             int height) {
+  if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
+      height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_u = src_u + (height - 1) * src_stride_u;
+    src_v = src_v + (height - 1) * src_stride_v;
+    src_stride_y = -src_stride_y;
+    src_stride_u = -src_stride_u;
+    src_stride_v = -src_stride_v;
+  }
+
+  if (dst_y) {
+    CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+  }
+  CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+  CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+  return 0;
+}
+
 // Copy I400.
 LIBYUV_API
 int I400ToI400(const uint8_t* src_y,
@ -3196,6 +3235,7 @@ int RAWToRGB24(const uint8_t* src_raw,
  return 0;
 }

+// TODO(fbarchard): Consider uint8_t value
 LIBYUV_API
 void SetPlane(uint8_t* dst_y,
              int dst_stride_y,
@ -3203,7 +3243,7 @@ void SetPlane(uint8_t* dst_y,
              int height,
              uint32_t value) {
  int y;
-  void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C;
+  void (*SetRow)(uint8_t* dst, uint8_t value, int width) = SetRow_C;

  if (width <= 0 || height == 0) {
    return;
@ -3256,7 +3296,7 @@ void SetPlane(uint8_t* dst_y,

  // Set plane
  for (y = 0; y < height; ++y) {
-    SetRow(dst_y, value, width);
+    SetRow(dst_y, (uint8_t)value, width);
    dst_y += dst_stride_y;
  }
 }
@ -3304,7 +3344,7 @@ int ARGBRect(uint8_t* dst_argb,
             int height,
             uint32_t value) {
  int y;
-  void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) =
+  void (*ARGBSetRow)(uint8_t* dst_argb, uint32_t value, int width) =
      ARGBSetRow_C;
  if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
    return -1;
@ -3609,7 +3649,7 @@ int ARGBSepia(uint8_t* dst_argb,
              int width,
              int height) {
  int y;
-  void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C;
+  void (*ARGBSepiaRow)(uint8_t* dst_argb, int width) = ARGBSepiaRow_C;
  uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
    return -1;
@ -3752,7 +3792,7 @@ int ARGBColorTable(uint8_t* dst_argb,
                   int width,
                   int height) {
  int y;
-  void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
+  void (*ARGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
                            int width) = ARGBColorTableRow_C;
  uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
  if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
@ -3788,7 +3828,7 @@ int RGBColorTable(uint8_t* dst_argb,
                  int width,
                  int height) {
  int y;
-  void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
+  void (*RGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
                           int width) = RGBColorTableRow_C;
  uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
  if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
@ -3833,7 +3873,7 @@ int ARGBQuantize(uint8_t* dst_argb,
                 int width,
                 int height) {
  int y;
-  void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size,
+  void (*ARGBQuantizeRow)(uint8_t* dst_argb, int scale, int interval_size,
                          int interval_offset, int width) = ARGBQuantizeRow_C;
  uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
@ -4086,7 +4126,7 @@ int InterpolatePlane(const uint8_t* src0,
                     int height,
                     int interpolation) {
  int y;
-  void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+  void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
                         ptrdiff_t src_stride, int dst_width,
                         int source_y_fraction) = InterpolateRow_C;
  if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
@ -4166,7 +4206,7 @@ int InterpolatePlane_16(const uint16_t* src0,
                        int height,
                        int interpolation) {
  int y;
-  void (*InterpolateRow_16)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+  void (*InterpolateRow_16)(uint16_t* dst_ptr, const uint16_t* src_ptr,
                            ptrdiff_t src_stride, int dst_width,
                            int source_y_fraction) = InterpolateRow_16_C;
  if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
@ -5281,7 +5321,7 @@ int UYVYToNV12(const uint8_t* src_uyvy,
  int halfwidth = (width + 1) >> 1;
  void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
                     int width) = SplitUVRow_C;
-  void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+  void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
                         ptrdiff_t src_stride, int dst_width,
                         int source_y_fraction) = InterpolateRow_C;

--- a/media/libyuv/libyuv/source/rotate.cc
+++ b/media/libyuv/libyuv/source/rotate.cc
@ -138,7 +138,7 @@ void RotatePlane180(const uint8_t* src,
                    int dst_stride,
                    int width,
                    int height) {
-  // Swap first and last row and mirror the content. Uses a temporary row.
+  // Swap top and bottom row and mirror the content. Uses a temporary row.
  align_buffer_64(row, width);
  const uint8_t* src_bot = src + src_stride * (height - 1);
  uint8_t* dst_bot = dst + dst_stride * (height - 1);
@ -209,9 +209,9 @@ void RotatePlane180(const uint8_t* src,

  // Odd height will harmlessly mirror the middle row twice.
  for (y = 0; y < half_height; ++y) {
-    CopyRow(src, row, width);        // Copy first row into buffer
-    MirrorRow(src_bot, dst, width);  // Mirror last row into first row
-    MirrorRow(row, dst_bot, width);  // Mirror buffer into last row
+    CopyRow(src, row, width);        // Copy top row into buffer
+    MirrorRow(src_bot, dst, width);  // Mirror bottom row into top row
+    MirrorRow(row, dst_bot, width);  // Mirror buffer into bottom row
    src += src_stride;
    dst += dst_stride;
    src_bot -= src_stride;
@ -476,6 +476,120 @@ int RotatePlane(const uint8_t* src,
  return -1;
 }

+LIBYUV_API
+void TransposePlane_16(const uint16_t* src,
+                       int src_stride,
+                       uint16_t* dst,
+                       int dst_stride,
+                       int width,
+                       int height) {
+  int i = height;
+  // Work across the source in 8x8 tiles
+  while (i >= 8) {
+    TransposeWx8_16_C(src, src_stride, dst, dst_stride, width);
+    src += 8 * src_stride;  // Go down 8 rows.
+    dst += 8;               // Move over 8 columns.
+    i -= 8;
+  }
+
+  if (i > 0) {
+    TransposeWxH_16_C(src, src_stride, dst, dst_stride, width, i);
+  }
+}
+
+static void RotatePlane90_16(const uint16_t* src,
+                             int src_stride,
+                             uint16_t* dst,
+                             int dst_stride,
+                             int width,
+                             int height) {
+  // Rotate by 90 is a transpose with the source read
+  // from bottom to top. So set the source pointer to the end
+  // of the buffer and flip the sign of the source stride.
+  src += src_stride * (height - 1);
+  src_stride = -src_stride;
+  TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
+}
+
+static void RotatePlane270_16(const uint16_t* src,
+                              int src_stride,
+                              uint16_t* dst,
+                              int dst_stride,
+                              int width,
+                              int height) {
+  // Rotate by 270 is a transpose with the destination written
+  // from bottom to top. So set the destination pointer to the end
+  // of the buffer and flip the sign of the destination stride.
+  dst += dst_stride * (width - 1);
+  dst_stride = -dst_stride;
+  TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
+}
+
+static void RotatePlane180_16(const uint16_t* src,
+                              int src_stride,
+                              uint16_t* dst,
+                              int dst_stride,
+                              int width,
+                              int height) {
+  // Swap top and bottom row and mirror the content. Uses a temporary row.
+  align_buffer_64_16(row, width);
+  const uint16_t* src_bot = src + src_stride * (height - 1);
+  uint16_t* dst_bot = dst + dst_stride * (height - 1);
+  int half_height = (height + 1) >> 1;
+  int y;
+
+  // Odd height will harmlessly mirror the middle row twice.
+  for (y = 0; y < half_height; ++y) {
+    CopyRow_16_C(src, row, width);        // Copy top row into buffer
+    MirrorRow_16_C(src_bot, dst, width);  // Mirror bottom row into top row
+    MirrorRow_16_C(row, dst_bot, width);  // Mirror buffer into bottom row
+    src += src_stride;
+    dst += dst_stride;
+    src_bot -= src_stride;
+    dst_bot -= dst_stride;
+  }
+  free_aligned_buffer_64_16(row);
+}
+
+LIBYUV_API
+int RotatePlane_16(const uint16_t* src,
+                   int src_stride,
+                   uint16_t* dst,
+                   int dst_stride,
+                   int width,
+                   int height,
+                   enum RotationMode mode) {
+  if (!src || width <= 0 || height == 0 || !dst) {
+    return -1;
+  }
+
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src = src + (height - 1) * src_stride;
+    src_stride = -src_stride;
+  }
+
+  switch (mode) {
+    case kRotate0:
+      // copy frame
+      CopyPlane_16(src, src_stride, dst, dst_stride, width, height);
+      return 0;
+    case kRotate90:
+      RotatePlane90_16(src, src_stride, dst, dst_stride, width, height);
+      return 0;
+    case kRotate270:
+      RotatePlane270_16(src, src_stride, dst, dst_stride, width, height);
+      return 0;
+    case kRotate180:
+      RotatePlane180_16(src, src_stride, dst, dst_stride, width, height);
+      return 0;
+    default:
+      break;
+  }
+  return -1;
+}
+
 LIBYUV_API
 int I420Rotate(const uint8_t* src_y,
               int src_stride_y,
@ -544,6 +658,8 @@ int I420Rotate(const uint8_t* src_y,
  return -1;
 }

+// I422 has half width x full height UV planes, so rotate by 90 and 270
+// require scaling to maintain 422 subsampling.
 LIBYUV_API
 int I422Rotate(const uint8_t* src_y,
               int src_stride_y,
@ -579,31 +695,42 @@ int I422Rotate(const uint8_t* src_y,

  switch (mode) {
    case kRotate0:
-      // copy frame
+      // Copy frame
      CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
      CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
      CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
      return 0;
+
+      // Note on temporary Y plane for UV.
+      // Rotation of UV first fits within the Y destination plane rows.
+      // Y plane is width x height
+      // Y plane rotated is height x width
+      // UV plane is (width / 2) x height
+      // UV plane rotated is height x (width / 2)
+      // UV plane rotated+scaled is (height / 2) x width.
+      // UV plane rotated is a temporary that fits within the Y plane rotated.
+
    case kRotate90:
-      // We need to rotate and rescale, we use plane Y as temporal storage.
-      RotatePlane90(src_u, src_stride_u, dst_y, height, halfwidth, height);
-      ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight,
+      RotatePlane90(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
+                    height);
+      ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
                 halfheight, width, kFilterBilinear);
-      RotatePlane90(src_v, src_stride_v, dst_y, height, halfwidth, height);
-      ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight,
+      RotatePlane90(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
+                    height);
+      ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
                 halfheight, width, kFilterLinear);
      RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
      return 0;
    case kRotate270:
-      // We need to rotate and rescale, we use plane Y as temporal storage.
-      RotatePlane270(src_u, src_stride_u, dst_y, height, halfwidth, height);
-      ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight,
+      RotatePlane270(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
+                     height);
+      ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
                 halfheight, width, kFilterBilinear);
-      RotatePlane270(src_v, src_stride_v, dst_y, height, halfwidth, height);
-      ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight,
+      RotatePlane270(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
+                     height);
+      ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
                 halfheight, width, kFilterLinear);
      RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
-
      return 0;
    case kRotate180:
      RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
@ -828,6 +955,228 @@ int Android420ToI420Rotate(const uint8_t* src_y,
  return -1;
 }

+LIBYUV_API
+int I010Rotate(const uint16_t* src_y,
+               int src_stride_y,
+               const uint16_t* src_u,
+               int src_stride_u,
+               const uint16_t* src_v,
+               int src_stride_v,
+               uint16_t* dst_y,
+               int dst_stride_y,
+               uint16_t* dst_u,
+               int dst_stride_u,
+               uint16_t* dst_v,
+               int dst_stride_v,
+               int width,
+               int height,
+               enum RotationMode mode) {
+  int halfwidth = (width + 1) >> 1;
+  int halfheight = (height + 1) >> 1;
+  if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
+      !dst_u || !dst_v || dst_stride_y < 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_u = src_u + (height - 1) * src_stride_u;
+    src_v = src_v + (height - 1) * src_stride_v;
+    src_stride_y = -src_stride_y;
+    src_stride_u = -src_stride_u;
+    src_stride_v = -src_stride_v;
+  }
+
+  switch (mode) {
+    case kRotate0:
+      // copy frame
+      return I010Copy(src_y, src_stride_y, src_u, src_stride_u, src_v,
+                      src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+                      dst_v, dst_stride_v, width, height);
+    case kRotate90:
+      RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+      RotatePlane90_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
+                       halfheight);
+      RotatePlane90_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
+                       halfheight);
+      return 0;
+    case kRotate270:
+      RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+                        height);
+      RotatePlane270_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
+                        halfheight);
+      RotatePlane270_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
+                        halfheight);
+      return 0;
+    case kRotate180:
+      RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+                        height);
+      RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
+                        halfheight);
+      RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
+                        halfheight);
+      return 0;
+    default:
+      break;
+  }
+  return -1;
+}
+
+// I210 has half width x full height UV planes, so rotate by 90 and 270
+// require scaling to maintain 422 subsampling.
+LIBYUV_API
+int I210Rotate(const uint16_t* src_y,
+               int src_stride_y,
+               const uint16_t* src_u,
+               int src_stride_u,
+               const uint16_t* src_v,
+               int src_stride_v,
+               uint16_t* dst_y,
+               int dst_stride_y,
+               uint16_t* dst_u,
+               int dst_stride_u,
+               uint16_t* dst_v,
+               int dst_stride_v,
+               int width,
+               int height,
+               enum RotationMode mode) {
+  int halfwidth = (width + 1) >> 1;
+  int halfheight = (height + 1) >> 1;
+  if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
+      !dst_u || !dst_v) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_u = src_u + (height - 1) * src_stride_u;
+    src_v = src_v + (height - 1) * src_stride_v;
+    src_stride_y = -src_stride_y;
+    src_stride_u = -src_stride_u;
+    src_stride_v = -src_stride_v;
+  }
+
+  switch (mode) {
+    case kRotate0:
+      // Copy frame
+      CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+      CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
+      CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
+      return 0;
+
+      // Note on temporary Y plane for UV.
+      // Rotation of UV first fits within the Y destination plane rows.
+      // Y plane is width x height
+      // Y plane rotated is height x width
+      // UV plane is (width / 2) x height
+      // UV plane rotated is height x (width / 2)
+      // UV plane rotated+scaled is (height / 2) x width.
+      // UV plane rotated is a temporary that fits within the Y plane rotated.
+
+    case kRotate90:
+      RotatePlane90_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
+                       height);
+      ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
+                    halfheight, width, kFilterBilinear);
+      RotatePlane90_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
+                       height);
+      ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
+                    halfheight, width, kFilterLinear);
+      RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+      return 0;
+    case kRotate270:
+      RotatePlane270_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
+                        height);
+      ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
+                    halfheight, width, kFilterBilinear);
+      RotatePlane270_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
+                        height);
+      ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
+                    halfheight, width, kFilterLinear);
+      RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+                        height);
+      return 0;
+    case kRotate180:
+      RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+                        height);
+      RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
+                        height);
+      RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
+                        height);
+      return 0;
+    default:
+      break;
+  }
+  return -1;
+}
+
+LIBYUV_API
+int I410Rotate(const uint16_t* src_y,
+               int src_stride_y,
+               const uint16_t* src_u,
+               int src_stride_u,
+               const uint16_t* src_v,
+               int src_stride_v,
+               uint16_t* dst_y,
+               int dst_stride_y,
+               uint16_t* dst_u,
+               int dst_stride_u,
+               uint16_t* dst_v,
+               int dst_stride_v,
+               int width,
+               int height,
+               enum RotationMode mode) {
+  if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
+      !dst_u || !dst_v || dst_stride_y < 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_u = src_u + (height - 1) * src_stride_u;
+    src_v = src_v + (height - 1) * src_stride_v;
+    src_stride_y = -src_stride_y;
+    src_stride_u = -src_stride_u;
+    src_stride_v = -src_stride_v;
+  }
+
+  switch (mode) {
+    case kRotate0:
+      // copy frame
+      CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+      CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+      CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+      return 0;
+    case kRotate90:
+      RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+      RotatePlane90_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+      RotatePlane90_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+      return 0;
+    case kRotate270:
+      RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+                        height);
+      RotatePlane270_16(src_u, src_stride_u, dst_u, dst_stride_u, width,
+                        height);
+      RotatePlane270_16(src_v, src_stride_v, dst_v, dst_stride_v, width,
+                        height);
+      return 0;
+    case kRotate180:
+      RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+                        height);
+      RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, width,
+                        height);
+      RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, width,
+                        height);
+      return 0;
+    default:
+      break;
+  }
+  return -1;
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv
--- a/media/libyuv/libyuv/source/rotate_common.cc
+++ b/media/libyuv/libyuv/source/rotate_common.cc
@ -94,8 +94,74 @@ void TransposeUVWxH_C(const uint8_t* src,
  for (i = 0; i < width * 2; i += 2) {
    int j;
    for (j = 0; j < height; ++j) {
-      dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
-      dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
+      dst_a[((i >> 1) * dst_stride_a) + j] = src[i + (j * src_stride)];
+      dst_b[((i >> 1) * dst_stride_b) + j] = src[i + (j * src_stride) + 1];
+    }
+  }
+}
+
+void TransposeWx8_16_C(const uint16_t* src,
+                       int src_stride,
+                       uint16_t* dst,
+                       int dst_stride,
+                       int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    dst[0] = src[0 * src_stride];
+    dst[1] = src[1 * src_stride];
+    dst[2] = src[2 * src_stride];
+    dst[3] = src[3 * src_stride];
+    dst[4] = src[4 * src_stride];
+    dst[5] = src[5 * src_stride];
+    dst[6] = src[6 * src_stride];
+    dst[7] = src[7 * src_stride];
+    ++src;
+    dst += dst_stride;
+  }
+}
+
+void TransposeUVWx8_16_C(const uint16_t* src,
+                         int src_stride,
+                         uint16_t* dst_a,
+                         int dst_stride_a,
+                         uint16_t* dst_b,
+                         int dst_stride_b,
+                         int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    dst_a[0] = src[0 * src_stride + 0];
+    dst_b[0] = src[0 * src_stride + 1];
+    dst_a[1] = src[1 * src_stride + 0];
+    dst_b[1] = src[1 * src_stride + 1];
+    dst_a[2] = src[2 * src_stride + 0];
+    dst_b[2] = src[2 * src_stride + 1];
+    dst_a[3] = src[3 * src_stride + 0];
+    dst_b[3] = src[3 * src_stride + 1];
+    dst_a[4] = src[4 * src_stride + 0];
+    dst_b[4] = src[4 * src_stride + 1];
+    dst_a[5] = src[5 * src_stride + 0];
+    dst_b[5] = src[5 * src_stride + 1];
+    dst_a[6] = src[6 * src_stride + 0];
+    dst_b[6] = src[6 * src_stride + 1];
+    dst_a[7] = src[7 * src_stride + 0];
+    dst_b[7] = src[7 * src_stride + 1];
+    src += 2;
+    dst_a += dst_stride_a;
+    dst_b += dst_stride_b;
+  }
+}
+
+void TransposeWxH_16_C(const uint16_t* src,
+                       int src_stride,
+                       uint16_t* dst,
+                       int dst_stride,
+                       int width,
+                       int height) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    int j;
+    for (j = 0; j < height; ++j) {
+      dst[i * dst_stride + j] = src[j * src_stride + i];
    }
  }
 }
--- a/media/libyuv/libyuv/source/rotate_neon64.cc
+++ b/media/libyuv/libyuv/source/rotate_neon64.cc
@ -201,13 +201,13 @@ void TransposeWx8_NEON(const uint8_t* src,

      "4:                                          \n"

-      : "=&r"(src_temp),                          // %0
-        "+r"(src),                                // %1
-        "+r"(dst),                                // %2
-        "+r"(width)                               // %3
-      : "r"(&kVTbl4x4Transpose),                  // %4
-        "r"(static_cast<ptrdiff_t>(src_stride)),  // %5
-        "r"(static_cast<ptrdiff_t>(dst_stride))   // %6
+      : "=&r"(src_temp),             // %0
+        "+r"(src),                   // %1
+        "+r"(dst),                   // %2
+        "+r"(width)                  // %3
+      : "r"(&kVTbl4x4Transpose),     // %4
+        "r"((ptrdiff_t)src_stride),  // %5
+        "r"((ptrdiff_t)dst_stride)   // %6
      : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
        "v17", "v18", "v19", "v20", "v21", "v22", "v23");
 }
@ -423,15 +423,15 @@ void TransposeUVWx8_NEON(const uint8_t* src,

      "4:                                        \n"

-      : "=&r"(src_temp),                            // %0
-        "+r"(src),                                  // %1
-        "+r"(dst_a),                                // %2
-        "+r"(dst_b),                                // %3
-        "+r"(width)                                 // %4
-      : "r"(static_cast<ptrdiff_t>(src_stride)),    // %5
-        "r"(static_cast<ptrdiff_t>(dst_stride_a)),  // %6
-        "r"(static_cast<ptrdiff_t>(dst_stride_b)),  // %7
-        "r"(&kVTbl4x4TransposeDi)                   // %8
+      : "=&r"(src_temp),               // %0
+        "+r"(src),                     // %1
+        "+r"(dst_a),                   // %2
+        "+r"(dst_b),                   // %3
+        "+r"(width)                    // %4
+      : "r"((ptrdiff_t)src_stride),    // %5
+        "r"((ptrdiff_t)dst_stride_a),  // %6
+        "r"((ptrdiff_t)dst_stride_b),  // %7
+        "r"(&kVTbl4x4TransposeDi)      // %8
      : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
        "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v30", "v31");
 }
--- a/media/libyuv/libyuv/source/row_any.cc
+++ b/media/libyuv/libyuv/source/row_any.cc
@ -1036,6 +1036,9 @@ ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 15)
 #ifdef HAS_ARGBTOYROW_MSA
 ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15)
 #endif
+#ifdef HAS_ARGBTOYROW_LSX
+ANY11(ARGBToYRow_Any_LSX, ARGBToYRow_LSX, 0, 4, 1, 15)
+#endif
 #ifdef HAS_ARGBTOYROW_LASX
 ANY11(ARGBToYRow_Any_LASX, ARGBToYRow_LASX, 0, 4, 1, 31)
 #endif
@ -1054,9 +1057,21 @@ ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15)
 #ifdef HAS_ARGBTOYJROW_LSX
 ANY11(ARGBToYJRow_Any_LSX, ARGBToYJRow_LSX, 0, 4, 1, 15)
 #endif
+#ifdef HAS_RGBATOYJROW_LSX
+ANY11(RGBAToYJRow_Any_LSX, RGBAToYJRow_LSX, 0, 4, 1, 15)
+#endif
+#ifdef HAS_ABGRTOYJROW_LSX
+ANY11(ABGRToYJRow_Any_LSX, ABGRToYJRow_LSX, 0, 4, 1, 15)
+#endif
+#ifdef HAS_RGBATOYJROW_LASX
+ANY11(RGBAToYJRow_Any_LASX, RGBAToYJRow_LASX, 0, 4, 1, 31)
+#endif
 #ifdef HAS_ARGBTOYJROW_LASX
 ANY11(ARGBToYJRow_Any_LASX, ARGBToYJRow_LASX, 0, 4, 1, 31)
 #endif
+#ifdef HAS_ABGRTOYJROW_LASX
+ANY11(ABGRToYJRow_Any_LASX, ABGRToYJRow_LASX, 0, 4, 1, 31)
+#endif
 #ifdef HAS_BGRATOYROW_NEON
 ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 15)
 #endif
@ -1066,6 +1081,9 @@ ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15)
 #ifdef HAS_BGRATOYROW_LSX
 ANY11(BGRAToYRow_Any_LSX, BGRAToYRow_LSX, 0, 4, 1, 15)
 #endif
+#ifdef HAS_BGRATOYROW_LASX
+ANY11(BGRAToYRow_Any_LASX, BGRAToYRow_LASX, 0, 4, 1, 31)
+#endif
 #ifdef HAS_ABGRTOYROW_NEON
 ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 15)
 #endif
@ -1075,6 +1093,9 @@ ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7)
 #ifdef HAS_ABGRTOYROW_LSX
 ANY11(ABGRToYRow_Any_LSX, ABGRToYRow_LSX, 0, 4, 1, 15)
 #endif
+#ifdef HAS_ABGRTOYROW_LASX
+ANY11(ABGRToYRow_Any_LASX, ABGRToYRow_LASX, 0, 4, 1, 31)
+#endif
 #ifdef HAS_RGBATOYROW_NEON
 ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 15)
 #endif
@ -1084,6 +1105,9 @@ ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15)
 #ifdef HAS_RGBATOYROW_LSX
 ANY11(RGBAToYRow_Any_LSX, RGBAToYRow_LSX, 0, 4, 1, 15)
 #endif
+#ifdef HAS_RGBATOYROW_LASX
+ANY11(RGBAToYRow_Any_LASX, RGBAToYRow_LASX, 0, 4, 1, 31)
+#endif
 #ifdef HAS_RGB24TOYROW_NEON
 ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 15)
 #endif
@ -1102,6 +1126,12 @@ ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
 #ifdef HAS_RGB24TOYROW_LSX
 ANY11(RGB24ToYRow_Any_LSX, RGB24ToYRow_LSX, 0, 3, 1, 15)
 #endif
+#ifdef HAS_RGB24TOYJROW_LSX
+ANY11(RGB24ToYJRow_Any_LSX, RGB24ToYJRow_LSX, 0, 3, 1, 15)
+#endif
+#ifdef HAS_RGB24TOYJROW_LASX
+ANY11(RGB24ToYJRow_Any_LASX, RGB24ToYJRow_LASX, 0, 3, 1, 31)
+#endif
 #ifdef HAS_RGB24TOYROW_LASX
 ANY11(RGB24ToYRow_Any_LASX, RGB24ToYRow_LASX, 0, 3, 1, 31)
 #endif
@ -1126,6 +1156,12 @@ ANY11(RAWToYRow_Any_LSX, RAWToYRow_LSX, 0, 3, 1, 15)
 #ifdef HAS_RAWTOYROW_LASX
 ANY11(RAWToYRow_Any_LASX, RAWToYRow_LASX, 0, 3, 1, 31)
 #endif
+#ifdef HAS_RAWTOYJROW_LSX
+ANY11(RAWToYJRow_Any_LSX, RAWToYJRow_LSX, 0, 3, 1, 15)
+#endif
+#ifdef HAS_RAWTOYJROW_LASX
+ANY11(RAWToYJRow_Any_LASX, RAWToYJRow_LASX, 0, 3, 1, 31)
+#endif
 #ifdef HAS_RGB565TOYROW_NEON
 ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
 #endif
--- a/media/libyuv/libyuv/source/row_common.cc
+++ b/media/libyuv/libyuv/source/row_common.cc
--- a/media/libyuv/libyuv/source/row_lasx.cc
+++ b/media/libyuv/libyuv/source/row_lasx.cc
@ -775,40 +775,6 @@ void UYVYToUV422Row_LASX(const uint8_t* src_uyvy,
  }
 }

-void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
-  int x;
-  int len = width / 32;
-  __m256i src0, src1, src2, src3, vec0, vec1, vec2, vec3;
-  __m256i tmp0, tmp1, dst0;
-  __m256i const_19 = __lasx_xvldi(0x19);
-  __m256i const_42 = __lasx_xvldi(0x42);
-  __m256i const_81 = __lasx_xvldi(0x81);
-  __m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
-                        0x1080108010801080, 0x1080108010801080};
-  __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
-                     0x0000000700000003};
-
-  for (x = 0; x < len; x++) {
-    DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64,
-              src_argb0, 96, src0, src1, src2, src3);
-    vec0 = __lasx_xvpickev_b(src1, src0);
-    vec1 = __lasx_xvpickev_b(src3, src2);
-    vec2 = __lasx_xvpickod_b(src1, src0);
-    vec3 = __lasx_xvpickod_b(src3, src2);
-    tmp0 = __lasx_xvmaddwev_h_bu(const_1080, vec0, const_19);
-    tmp1 = __lasx_xvmaddwev_h_bu(const_1080, vec1, const_19);
-    tmp0 = __lasx_xvmaddwev_h_bu(tmp0, vec2, const_81);
-    tmp1 = __lasx_xvmaddwev_h_bu(tmp1, vec3, const_81);
-    tmp0 = __lasx_xvmaddwod_h_bu(tmp0, vec0, const_42);
-    tmp1 = __lasx_xvmaddwod_h_bu(tmp1, vec1, const_42);
-    dst0 = __lasx_xvssrani_b_h(tmp1, tmp0, 8);
-    dst0 = __lasx_xvperm_w(dst0, control);
-    __lasx_xvst(dst0, dst_y, 0);
-    src_argb0 += 128;
-    dst_y += 32;
-  }
-}
-
 void ARGBToUVRow_LASX(const uint8_t* src_argb0,
                      int src_stride_argb,
                      uint8_t* dst_u,
@ -1811,48 +1777,6 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565,
  }
 }

-void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
-  int x;
-  int len = width / 32;
-  __m256i src0, src1, src2;
-  __m256i tmp0, tmp1, tmp2, tmp3;
-  __m256i reg0, reg1, reg2, dst0;
-  __m256i const_129 = __lasx_xvldi(129);
-  __m256i const_br = {0x4219421942194219, 0x4219421942194219,
-                      0x4219421942194219, 0x4219421942194219};
-  __m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
-                        0x1080108010801080, 0x1080108010801080};
-  __m256i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C, 0x0B09080605030200,
-                    0x17151412110F0E0C};
-  __m256i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604, 0x0301001E1D1B1A18,
-                    0x0F0D0C0A09070604};
-  __m256i shuff2 = {0x000A000700040001, 0x001600130010000D, 0x000A000700040001,
-                    0x001600130010000D};
-  __m256i shuff3 = {0x0002001F001C0019, 0x000E000B00080005, 0x0002001F001C0019,
-                    0x000E000B00080005};
-
-  for (x = 0; x < len; x++) {
-    reg0 = __lasx_xvld(src_rgb24, 0);
-    reg1 = __lasx_xvld(src_rgb24, 32);
-    reg2 = __lasx_xvld(src_rgb24, 64);
-    src0 = __lasx_xvpermi_q(reg1, reg0, 0x30);
-    src1 = __lasx_xvpermi_q(reg2, reg0, 0x21);
-    src2 = __lasx_xvpermi_q(reg2, reg1, 0x30);
-    tmp0 = __lasx_xvshuf_b(src1, src0, shuff0);
-    tmp1 = __lasx_xvshuf_b(src1, src2, shuff1);
-    tmp2 = __lasx_xvshuf_b(src1, src0, shuff2);
-    tmp3 = __lasx_xvshuf_b(src1, src2, shuff3);
-    reg0 = __lasx_xvmaddwev_h_bu(const_1080, tmp2, const_129);
-    reg1 = __lasx_xvmaddwev_h_bu(const_1080, tmp3, const_129);
-    reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
-    reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp1);
-    dst0 = __lasx_xvpickod_b(reg1, reg0);
-    __lasx_xvst(dst0, dst_y, 0);
-    dst_y += 32;
-    src_rgb24 += 96;
-  }
-}
-
 void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
                       int src_stride_rgb24,
                       uint8_t* dst_u,
@ -1916,48 +1840,6 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
  }
 }

-void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
-  int x;
-  int len = width / 32;
-  __m256i src0, src1, src2;
-  __m256i tmp0, tmp1, tmp2, tmp3;
-  __m256i reg0, reg1, reg2, dst0;
-  __m256i const_129 = __lasx_xvldi(129);
-  __m256i const_br = {0x1942194219421942, 0x1942194219421942,
-                      0x1942194219421942, 0x1942194219421942};
-  __m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
-                        0x1080108010801080, 0x1080108010801080};
-  __m256i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C, 0x0B09080605030200,
-                    0x17151412110F0E0C};
-  __m256i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604, 0x0301001E1D1B1A18,
-                    0x0F0D0C0A09070604};
-  __m256i shuff2 = {0x000A000700040001, 0x001600130010000D, 0x000A000700040001,
-                    0x001600130010000D};
-  __m256i shuff3 = {0x0002001F001C0019, 0x000E000B00080005, 0x0002001F001C0019,
-                    0x000E000B00080005};
-
-  for (x = 0; x < len; x++) {
-    reg0 = __lasx_xvld(src_raw, 0);
-    reg1 = __lasx_xvld(src_raw, 32);
-    reg2 = __lasx_xvld(src_raw, 64);
-    src0 = __lasx_xvpermi_q(reg1, reg0, 0x30);
-    src1 = __lasx_xvpermi_q(reg2, reg0, 0x21);
-    src2 = __lasx_xvpermi_q(reg2, reg1, 0x30);
-    tmp0 = __lasx_xvshuf_b(src1, src0, shuff0);
-    tmp1 = __lasx_xvshuf_b(src1, src2, shuff1);
-    tmp2 = __lasx_xvshuf_b(src1, src0, shuff2);
-    tmp3 = __lasx_xvshuf_b(src1, src2, shuff3);
-    reg0 = __lasx_xvmaddwev_h_bu(const_1080, tmp2, const_129);
-    reg1 = __lasx_xvmaddwev_h_bu(const_1080, tmp3, const_129);
-    reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
-    reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp1);
-    dst0 = __lasx_xvpickod_b(reg1, reg0);
-    __lasx_xvst(dst0, dst_y, 0);
-    dst_y += 32;
-    src_raw += 96;
-  }
-}
-
 void RAWToUVRow_LASX(const uint8_t* src_raw,
                     int src_stride_raw,
                     uint8_t* dst_u,
@ -2118,36 +2000,226 @@ void NV21ToARGBRow_LASX(const uint8_t* src_y,
  }
 }

-void ARGBToYJRow_LASX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
-  int x;
-  int len = width / 32;
-  __m256i src0, src1, src2, src3, dst0;
-  __m256i tmp0, tmp1, tmp2, tmp3;
-  __m256i reg0, reg1;
-  __m256i const_128 = __lasx_xvldi(0x480);
-  __m256i const_150 = __lasx_xvldi(0x96);
-  __m256i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D,
-                      0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D};
-  __m256i shuff = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
-                   0x0000000700000003};
+struct RgbConstants {
+  uint8_t kRGBToY[4];
+  uint16_t kAddY;
+  uint16_t pad;
+};

-  for (x = 0; x < len; x++) {
-    DUP4_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src_argb, 64, src_argb,
-              96, src0, src1, src2, src3);
-    tmp0 = __lasx_xvpickev_b(src1, src0);
-    tmp1 = __lasx_xvpickod_b(src1, src0);
-    tmp2 = __lasx_xvpickev_b(src3, src2);
-    tmp3 = __lasx_xvpickod_b(src3, src2);
-    reg0 = __lasx_xvmaddwev_h_bu(const_128, tmp1, const_150);
-    reg1 = __lasx_xvmaddwev_h_bu(const_128, tmp3, const_150);
-    reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
-    reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp2);
-    dst0 = __lasx_xvpickod_b(reg1, reg0);
-    dst0 = __lasx_xvperm_w(dst0, shuff);
-    __lasx_xvst(dst0, dst_y, 0);
-    dst_y += 32;
-    src_argb += 128;
-  }
+// RGB to JPeg coefficients
+// B * 0.1140 coefficient = 29
+// G * 0.5870 coefficient = 150
+// R * 0.2990 coefficient = 77
+// Add 0.5 = 0x80
+static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
+                                                        128,
+                                                        0};
+
+static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
+
+// RGB to BT.601 coefficients
+// B * 0.1016 coefficient = 25
+// G * 0.5078 coefficient = 129
+// R * 0.2578 coefficient = 66
+// Add 16.5 = 0x1080
+
+static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
+                                                        0x1080,
+                                                        0};
+
+static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
+                                                      0x1080,
+                                                      0};
+
+// ARGB expects first 3 values to contain RGB and 4th value is ignored.
+static void ARGBToYMatrixRow_LASX(const uint8_t* src_argb,
+                                  uint8_t* dst_y,
+                                  int width,
+                                  const struct RgbConstants* rgbconstants) {
+    int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7};
+    asm volatile(
+      "xvldrepl.b      $xr0,  %3,    0             \n\t"  // load rgbconstants
+      "xvldrepl.b      $xr1,  %3,    1             \n\t"  // load rgbconstants
+      "xvldrepl.b      $xr2,  %3,    2             \n\t"  // load rgbconstants
+      "xvldrepl.h      $xr3,  %3,    4             \n\t"  // load rgbconstants
+      "xvld            $xr20, %4,    0             \n\t"  // load shuff
+      "1:                                          \n\t"
+      "xvld            $xr4,  %0,    0             \n\t"
+      "xvld            $xr5,  %0,    32            \n\t"
+      "xvld            $xr6,  %0,    64            \n\t"
+      "xvld            $xr7,  %0,    96            \n\t"  // load 32 pixels of ARGB
+      "xvor.v          $xr12, $xr3,  $xr3          \n\t"
+      "xvor.v          $xr13, $xr3,  $xr3          \n\t"
+      "addi.d          %2,    %2,    -32           \n\t"  // 32 processed per loop.
+      "xvpickev.b      $xr8,  $xr5,  $xr4          \n\t"  //BR
+      "xvpickev.b      $xr10, $xr7,  $xr6          \n\t"
+      "xvpickod.b      $xr9,  $xr5,  $xr4          \n\t"  //GA
+      "xvpickod.b      $xr11, $xr7,  $xr6          \n\t"
+      "xvmaddwev.h.bu  $xr12, $xr8,  $xr0          \n\t"  //B
+      "xvmaddwev.h.bu  $xr13, $xr10, $xr0          \n\t"
+      "xvmaddwev.h.bu  $xr12, $xr9,  $xr1          \n\t"  //G
+      "xvmaddwev.h.bu  $xr13, $xr11, $xr1          \n\t"
+      "xvmaddwod.h.bu  $xr12, $xr8,  $xr2          \n\t"  //R
+      "xvmaddwod.h.bu  $xr13, $xr10, $xr2          \n\t"
+      "addi.d          %0,    %0,    128           \n\t"
+      "xvpickod.b      $xr10, $xr13, $xr12         \n\t"
+      "xvperm.w        $xr11, $xr10, $xr20         \n\t"
+      "xvst            $xr11, %1,    0             \n\t"
+      "addi.d          %1,    %1,    32            \n\t"
+      "bnez            %2,    1b                   \n\t"
+      : "+&r"(src_argb),    // %0
+        "+&r"(dst_y),       // %1
+        "+&r"(width)        // %2
+      : "r"(rgbconstants),
+        "r"(shuff)
+      : "memory"
+    );
+}
+
+void ARGBToYRow_LASX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+  ARGBToYMatrixRow_LASX(src_argb, dst_y, width, &kRgb24I601Constants);
+}
+
+void ARGBToYJRow_LASX(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
+  ARGBToYMatrixRow_LASX(src_argb, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void ABGRToYRow_LASX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+  ARGBToYMatrixRow_LASX(src_abgr, dst_y, width, &kRawI601Constants);
+}
+
+void ABGRToYJRow_LASX(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
+  ARGBToYMatrixRow_LASX(src_abgr, dst_yj, width, &kRawJPEGConstants);
+}
+
+// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
+// Same code as ARGB, except the LD4
+static void RGBAToYMatrixRow_LASX(const uint8_t* src_rgba,
+                                  uint8_t* dst_y,
+                                  int width,
+                                  const struct RgbConstants* rgbconstants) {
+    int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7};
+    asm volatile(
+      "xvldrepl.b      $xr0,  %3,    0             \n\t"  // load rgbconstants
+      "xvldrepl.b      $xr1,  %3,    1             \n\t"  // load rgbconstants
+      "xvldrepl.b      $xr2,  %3,    2             \n\t"  // load rgbconstants
+      "xvldrepl.h      $xr3,  %3,    4             \n\t"  // load rgbconstants
+      "xvld            $xr20, %4,    0             \n\t"  // load shuff
+      "1:                                          \n\t"
+      "xvld            $xr4,  %0,    0             \n\t"
+      "xvld            $xr5,  %0,    32            \n\t"
+      "xvld            $xr6,  %0,    64            \n\t"
+      "xvld            $xr7,  %0,    96            \n\t"  // load 32 pixels of RGBA
+      "xvor.v          $xr12, $xr3,  $xr3          \n\t"
+      "xvor.v          $xr13, $xr3,  $xr3          \n\t"
+      "addi.d          %2,    %2,    -32           \n\t"  // 32 processed per loop.
+      "xvpickev.b      $xr8,  $xr5,  $xr4          \n\t"  //AG
+      "xvpickev.b      $xr10, $xr7,  $xr6          \n\t"
+      "xvpickod.b      $xr9,  $xr5,  $xr4          \n\t"  //BR
+      "xvpickod.b      $xr11, $xr7,  $xr6          \n\t"
+      "xvmaddwev.h.bu  $xr12, $xr9,  $xr0          \n\t"  //B
+      "xvmaddwev.h.bu  $xr13, $xr11, $xr0          \n\t"
+      "xvmaddwod.h.bu  $xr12, $xr8,  $xr1          \n\t"  //G
+      "xvmaddwod.h.bu  $xr13, $xr10, $xr1          \n\t"
+      "xvmaddwod.h.bu  $xr12, $xr9,  $xr2          \n\t"  //R
+      "xvmaddwod.h.bu  $xr13, $xr11, $xr2          \n\t"
+      "addi.d          %0,    %0,    128           \n\t"
+      "xvpickod.b      $xr10, $xr13, $xr12         \n\t"
+      "xvperm.w        $xr11, $xr10, $xr20         \n\t"
+      "xvst            $xr11, %1,    0             \n\t"
+      "addi.d          %1,    %1,    32            \n\t"
+      "bnez            %2,    1b                   \n\t"
+      : "+&r"(src_rgba),    // %0
+        "+&r"(dst_y),       // %1
+        "+&r"(width)        // %2
+      : "r"(rgbconstants),
+        "r"(shuff)
+      : "memory"
+    );
+}
+
+void RGBAToYRow_LASX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
+  RGBAToYMatrixRow_LASX(src_rgba, dst_y, width, &kRgb24I601Constants);
+}
+
+void RGBAToYJRow_LASX(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
+  RGBAToYMatrixRow_LASX(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void BGRAToYRow_LASX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
+  RGBAToYMatrixRow_LASX(src_bgra, dst_y, width, &kRawI601Constants);
+}
+
+static void RGBToYMatrixRow_LASX(const uint8_t* src_rgba,
+                                  uint8_t* dst_y,
+                                  int width,
+                                  const struct RgbConstants* rgbconstants) {
+    int8_t shuff[128] = {0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
+                         0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
+                         24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
+                         24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
+                         1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
+                         1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
+                         25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0,
+                         25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
+    asm volatile(
+      "xvldrepl.b      $xr0,  %3,    0             \n\t"  // load rgbconstants
+      "xvldrepl.b      $xr1,  %3,    1             \n\t"  // load rgbconstants
+      "xvldrepl.b      $xr2,  %3,    2             \n\t"  // load rgbconstants
+      "xvldrepl.h      $xr3,  %3,    4             \n\t"  // load rgbconstants
+      "xvld            $xr4,  %4,    0             \n\t"  // load shuff
+      "xvld            $xr5,  %4,    32            \n\t"
+      "xvld            $xr6,  %4,    64            \n\t"
+      "xvld            $xr7,  %4,    96            \n\t"
+      "1:                                          \n\t"
+      "xvld            $xr8,  %0,    0             \n\t"
+      "xvld            $xr9,  %0,    32            \n\t"
+      "xvld            $xr10, %0,    64            \n\t"  // load 32 pixels of RGB
+      "xvor.v          $xr12, $xr3,  $xr3          \n\t"
+      "xvor.v          $xr13, $xr3,  $xr3          \n\t"
+      "xvor.v          $xr11, $xr9,  $xr9          \n\t"
+      "addi.d          %2,    %2,    -32           \n\t"  // 32 processed per loop.
+      "xvpermi.q       $xr9,  $xr8,  0x30          \n\t"  //src0
+      "xvpermi.q       $xr8,  $xr10, 0x03          \n\t"  //src1
+      "xvpermi.q       $xr10, $xr11, 0x30          \n\t"  //src2
+      "xvshuf.b        $xr14, $xr8,  $xr9,  $xr4   \n\t"
+      "xvshuf.b        $xr15, $xr8,  $xr10, $xr5   \n\t"
+      "xvshuf.b        $xr16, $xr8,  $xr9,  $xr6   \n\t"
+      "xvshuf.b        $xr17, $xr8,  $xr10, $xr7   \n\t"
+      "xvmaddwev.h.bu  $xr12, $xr16, $xr1          \n\t"  //G
+      "xvmaddwev.h.bu  $xr13, $xr17, $xr1          \n\t"
+      "xvmaddwev.h.bu  $xr12, $xr14, $xr0          \n\t"  //B
+      "xvmaddwev.h.bu  $xr13, $xr15, $xr0          \n\t"
+      "xvmaddwod.h.bu  $xr12, $xr14, $xr2          \n\t"  //R
+      "xvmaddwod.h.bu  $xr13, $xr15, $xr2          \n\t"
+      "addi.d          %0,    %0,    96            \n\t"
+      "xvpickod.b      $xr10, $xr13, $xr12         \n\t"
+      "xvst            $xr10, %1,    0             \n\t"
+      "addi.d          %1,    %1,    32            \n\t"
+      "bnez            %2,    1b                   \n\t"
+      : "+&r"(src_rgba),    // %0
+        "+&r"(dst_y),       // %1
+        "+&r"(width)        // %2
+      : "r"(rgbconstants),  // %3
+        "r"(shuff)          // %4
+      : "memory"
+    );
+}
+
+void RGB24ToYJRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
+  RGBToYMatrixRow_LASX(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
+  RGBToYMatrixRow_LASX(src_raw, dst_yj, width, &kRawJPEGConstants);
+}
+
+void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
+  RGBToYMatrixRow_LASX(src_rgb24, dst_y, width, &kRgb24I601Constants);
+}
+
+void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
+  RGBToYMatrixRow_LASX(src_raw, dst_y, width, &kRawI601Constants);
 }

 void ARGBToUVJRow_LASX(const uint8_t* src_argb,
--- a/media/libyuv/libyuv/source/row_lsx.cc
+++ b/media/libyuv/libyuv/source/row_lsx.cc
@ -561,39 +561,6 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565,
  }
 }

-void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
-  int x;
-  int len = width / 16;
-  __m128i src0, src1, src2;
-  __m128i tmp0, tmp1, tmp2, tmp3;
-  __m128i reg0, reg1, dst0;
-  __m128i const_129 = __lsx_vldi(129);
-  __m128i const_br = {0x4219421942194219, 0x4219421942194219};
-  __m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
-  __m128i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C};
-  __m128i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604};
-  __m128i shuff2 = {0x000A000700040001, 0x001600130010000D};
-  __m128i shuff3 = {0x0002001F001C0019, 0x000E000B00080005};
-
-  for (x = 0; x < len; x++) {
-    src0 = __lsx_vld(src_rgb24, 0);
-    src1 = __lsx_vld(src_rgb24, 16);
-    src2 = __lsx_vld(src_rgb24, 32);
-    tmp0 = __lsx_vshuf_b(src1, src0, shuff0);
-    tmp1 = __lsx_vshuf_b(src1, src2, shuff1);
-    tmp2 = __lsx_vshuf_b(src1, src0, shuff2);
-    tmp3 = __lsx_vshuf_b(src1, src2, shuff3);
-    reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp2, const_129);
-    reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
-    reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
-    reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1);
-    dst0 = __lsx_vpickod_b(reg1, reg0);
-    __lsx_vst(dst0, dst_y, 0);
-    dst_y += 16;
-    src_rgb24 += 48;
-  }
-}
-
 void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
                      int src_stride_rgb24,
                      uint8_t* dst_u,
@ -647,39 +614,6 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
  }
 }

-void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
-  int x;
-  int len = width / 16;
-  __m128i src0, src1, src2;
-  __m128i tmp0, tmp1, tmp2, tmp3;
-  __m128i reg0, reg1, dst0;
-  __m128i const_129 = __lsx_vldi(129);
-  __m128i const_br = {0x1942194219421942, 0x1942194219421942};
-  __m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
-  __m128i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C};
-  __m128i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604};
-  __m128i shuff2 = {0x000A000700040001, 0x001600130010000D};
-  __m128i shuff3 = {0x0002001F001C0019, 0x000E000B00080005};
-
-  for (x = 0; x < len; x++) {
-    src0 = __lsx_vld(src_raw, 0);
-    src1 = __lsx_vld(src_raw, 16);
-    src2 = __lsx_vld(src_raw, 32);
-    tmp0 = __lsx_vshuf_b(src1, src0, shuff0);
-    tmp1 = __lsx_vshuf_b(src1, src2, shuff1);
-    tmp2 = __lsx_vshuf_b(src1, src0, shuff2);
-    tmp3 = __lsx_vshuf_b(src1, src2, shuff3);
-    reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp2, const_129);
-    reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
-    reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
-    reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1);
-    dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
-    __lsx_vst(dst0, dst_y, 0);
-    dst_y += 16;
-    src_raw += 48;
-  }
-}
-
 void RAWToUVRow_LSX(const uint8_t* src_raw,
                    int src_stride_raw,
                    uint8_t* dst_u,
@ -914,62 +848,6 @@ void SobelXYRow_LSX(const uint8_t* src_sobelx,
  }
 }

-void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
-  int x;
-  int len = width / 16;
-  __m128i src0, src1, src2, src3, dst0;
-  __m128i tmp0, tmp1, tmp2, tmp3;
-  __m128i reg0, reg1;
-  __m128i const_128 = __lsx_vldi(0x480);
-  __m128i const_150 = __lsx_vldi(0x96);
-  __m128i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D};
-
-  for (x = 0; x < len; x++) {
-    DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
-              src0, src1, src2, src3);
-    tmp0 = __lsx_vpickev_b(src1, src0);
-    tmp1 = __lsx_vpickod_b(src1, src0);
-    tmp2 = __lsx_vpickev_b(src3, src2);
-    tmp3 = __lsx_vpickod_b(src3, src2);
-    reg0 = __lsx_vmaddwev_h_bu(const_128, tmp1, const_150);
-    reg1 = __lsx_vmaddwev_h_bu(const_128, tmp3, const_150);
-    reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
-    reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
-    dst0 = __lsx_vpickod_b(reg1, reg0);
-    __lsx_vst(dst0, dst_y, 0);
-    dst_y += 16;
-    src_argb += 64;
-  }
-}
-
-void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
-  int x;
-  int len = width / 16;
-  __m128i src0, src1, src2, src3, dst0;
-  __m128i tmp0, tmp1, tmp2, tmp3;
-  __m128i reg0, reg1;
-  __m128i const_129 = __lsx_vldi(0x81);
-  __m128i const_br = {0x1942194219421942, 0x1942194219421942};
-  __m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
-
-  for (x = 0; x < len; x++) {
-    DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48,
-              src0, src1, src2, src3);
-    tmp0 = __lsx_vpickod_b(src1, src0);
-    tmp1 = __lsx_vpickev_b(src1, src0);
-    tmp2 = __lsx_vpickod_b(src3, src2);
-    tmp3 = __lsx_vpickev_b(src3, src2);
-    reg0 = __lsx_vmaddwod_h_bu(const_1080, tmp1, const_129);
-    reg1 = __lsx_vmaddwod_h_bu(const_1080, tmp3, const_129);
-    reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
-    reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
-    dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
-    __lsx_vst(dst0, dst_y, 0);
-    dst_y += 16;
-    src_bgra += 64;
-  }
-}
-
 void BGRAToUVRow_LSX(const uint8_t* src_bgra,
                     int src_stride_bgra,
                     uint8_t* dst_u,
@ -1018,34 +896,6 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra,
  }
 }

-void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
-  int x;
-  int len = width / 16;
-  __m128i src0, src1, src2, src3, dst0;
-  __m128i tmp0, tmp1, tmp2, tmp3;
-  __m128i reg0, reg1;
-  __m128i const_129 = __lsx_vldi(0x81);
-  __m128i const_br = {0x1942194219421942, 0x1942194219421942};
-  __m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
-
-  for (x = 0; x < len; x++) {
-    DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48,
-              src0, src1, src2, src3);
-    tmp0 = __lsx_vpickev_b(src1, src0);
-    tmp1 = __lsx_vpickod_b(src1, src0);
-    tmp2 = __lsx_vpickev_b(src3, src2);
-    tmp3 = __lsx_vpickod_b(src3, src2);
-    reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp1, const_129);
-    reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
-    reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
-    reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
-    dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
-    __lsx_vst(dst0, dst_y, 0);
-    dst_y += 16;
-    src_abgr += 64;
-  }
-}
-
 void ABGRToUVRow_LSX(const uint8_t* src_abgr,
                     int src_stride_abgr,
                     uint8_t* dst_u,
@ -1094,34 +944,6 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr,
  }
 }

-void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
-  int x;
-  int len = width / 16;
-  __m128i src0, src1, src2, src3, dst0;
-  __m128i tmp0, tmp1, tmp2, tmp3;
-  __m128i reg0, reg1;
-  __m128i const_129 = __lsx_vldi(0x81);
-  __m128i const_br = {0x4219421942194219, 0x4219421942194219};
-  __m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
-
-  for (x = 0; x < len; x++) {
-    DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48,
-              src0, src1, src2, src3);
-    tmp0 = __lsx_vpickod_b(src1, src0);
-    tmp1 = __lsx_vpickev_b(src1, src0);
-    tmp2 = __lsx_vpickod_b(src3, src2);
-    tmp3 = __lsx_vpickev_b(src3, src2);
-    reg0 = __lsx_vmaddwod_h_bu(const_1080, tmp1, const_129);
-    reg1 = __lsx_vmaddwod_h_bu(const_1080, tmp3, const_129);
-    reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
-    reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
-    dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
-    __lsx_vst(dst0, dst_y, 0);
-    dst_y += 16;
-    src_rgba += 64;
-  }
-}
-
 void RGBAToUVRow_LSX(const uint8_t* src_rgba,
                     int src_stride_rgba,
                     uint8_t* dst_u,
@ -1821,6 +1643,212 @@ void HalfFloatRow_LSX(const uint16_t* src,
  }
 }

+struct RgbConstants {
+  uint8_t kRGBToY[4];
+  uint16_t kAddY;
+  uint16_t pad;
+};
+
+// RGB to JPeg coefficients
+// B * 0.1140 coefficient = 29
+// G * 0.5870 coefficient = 150
+// R * 0.2990 coefficient = 77
+// Add 0.5 = 0x80
+static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
+                                                        128,
+                                                        0};
+
+static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
+
+// RGB to BT.601 coefficients
+// B * 0.1016 coefficient = 25
+// G * 0.5078 coefficient = 129
+// R * 0.2578 coefficient = 66
+// Add 16.5 = 0x1080
+
+static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
+                                                        0x1080,
+                                                        0};
+
+static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
+                                                      0x1080,
+                                                      0};
+
+// ARGB expects first 3 values to contain RGB and 4th value is ignored.
+static void ARGBToYMatrixRow_LSX(const uint8_t* src_argb,
+                                 uint8_t* dst_y,
+                                 int width,
+                                 const struct RgbConstants* rgbconstants) {
+    asm volatile(
+      "vldrepl.b      $vr0,  %3,    0             \n\t"  // load rgbconstants
+      "vldrepl.b      $vr1,  %3,    1             \n\t"  // load rgbconstants
+      "vldrepl.b      $vr2,  %3,    2             \n\t"  // load rgbconstants
+      "vldrepl.h      $vr3,  %3,    4             \n\t"  // load rgbconstants
+      "1:                                         \n\t"
+      "vld            $vr4,  %0,    0             \n\t"
+      "vld            $vr5,  %0,    16            \n\t"
+      "vld            $vr6,  %0,    32            \n\t"
+      "vld            $vr7,  %0,    48            \n\t"  // load 16 pixels of ARGB
+      "vor.v          $vr12, $vr3,  $vr3          \n\t"
+      "vor.v          $vr13, $vr3,  $vr3          \n\t"
+      "addi.d         %2,    %2,    -16           \n\t"  // 16 processed per loop.
+      "vpickev.b      $vr8,  $vr5,  $vr4          \n\t"  //BR
+      "vpickev.b      $vr10, $vr7,  $vr6          \n\t"
+      "vpickod.b      $vr9,  $vr5,  $vr4          \n\t"  //GA
+      "vpickod.b      $vr11, $vr7,  $vr6          \n\t"
+      "vmaddwev.h.bu  $vr12, $vr8,  $vr0          \n\t"  //B
+      "vmaddwev.h.bu  $vr13, $vr10, $vr0          \n\t"
+      "vmaddwev.h.bu  $vr12, $vr9,  $vr1          \n\t"  //G
+      "vmaddwev.h.bu  $vr13, $vr11, $vr1          \n\t"
+      "vmaddwod.h.bu  $vr12, $vr8,  $vr2          \n\t"  //R
+      "vmaddwod.h.bu  $vr13, $vr10, $vr2          \n\t"
+      "addi.d         %0,    %0,    64            \n\t"
+      "vpickod.b      $vr10, $vr13, $vr12         \n\t"
+      "vst            $vr10, %1,    0             \n\t"
+      "addi.d         %1,    %1,    16            \n\t"
+      "bnez           %2,    1b                   \n\t"
+      : "+&r"(src_argb),    // %0
+        "+&r"(dst_y),       // %1
+        "+&r"(width)        // %2
+      : "r"(rgbconstants)
+      : "memory"
+    );
+}
+
+void ARGBToYRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+  ARGBToYMatrixRow_LSX(src_argb, dst_y, width, &kRgb24I601Constants);
+}
+
+void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
+  ARGBToYMatrixRow_LSX(src_argb, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+  ARGBToYMatrixRow_LSX(src_abgr, dst_y, width, &kRawI601Constants);
+}
+
+void ABGRToYJRow_LSX(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
+  ARGBToYMatrixRow_LSX(src_abgr, dst_yj, width, &kRawJPEGConstants);
+}
+
+// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
+// Same code as ARGB, except the LD4
+static void RGBAToYMatrixRow_LSX(const uint8_t* src_rgba,
+                                 uint8_t* dst_y,
+                                 int width,
+                                 const struct RgbConstants* rgbconstants) {
+    asm volatile(
+      "vldrepl.b      $vr0,  %3,    0             \n\t"  // load rgbconstants
+      "vldrepl.b      $vr1,  %3,    1             \n\t"  // load rgbconstants
+      "vldrepl.b      $vr2,  %3,    2             \n\t"  // load rgbconstants
+      "vldrepl.h      $vr3,  %3,    4             \n\t"  // load rgbconstants
+      "1:                                         \n\t"
+      "vld            $vr4,  %0,    0             \n\t"
+      "vld            $vr5,  %0,    16            \n\t"
+      "vld            $vr6,  %0,    32            \n\t"
+      "vld            $vr7,  %0,    48            \n\t"  // load 16 pixels of RGBA
+      "vor.v          $vr12, $vr3,  $vr3          \n\t"
+      "vor.v          $vr13, $vr3,  $vr3          \n\t"
+      "addi.d         %2,    %2,    -16           \n\t"  // 16 processed per loop.
+      "vpickev.b      $vr8,  $vr5,  $vr4          \n\t"  //AG
+      "vpickev.b      $vr10, $vr7,  $vr6          \n\t"
+      "vpickod.b      $vr9,  $vr5,  $vr4          \n\t"  //BR
+      "vpickod.b      $vr11, $vr7,  $vr6          \n\t"
+      "vmaddwev.h.bu  $vr12, $vr9,  $vr0          \n\t"  //B
+      "vmaddwev.h.bu  $vr13, $vr11, $vr0          \n\t"
+      "vmaddwod.h.bu  $vr12, $vr8,  $vr1          \n\t"  //G
+      "vmaddwod.h.bu  $vr13, $vr10, $vr1          \n\t"
+      "vmaddwod.h.bu  $vr12, $vr9,  $vr2          \n\t"  //R
+      "vmaddwod.h.bu  $vr13, $vr11, $vr2          \n\t"
+      "addi.d         %0,    %0,    64            \n\t"
+      "vpickod.b      $vr10, $vr13, $vr12         \n\t"
+      "vst            $vr10, %1,    0             \n\t"
+      "addi.d         %1,    %1,    16            \n\t"
+      "bnez           %2,    1b                   \n\t"
+      : "+&r"(src_rgba),    // %0
+        "+&r"(dst_y),       // %1
+        "+&r"(width)        // %2
+      : "r"(rgbconstants)
+      : "memory"
+    );
+}
+
+void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
+  RGBAToYMatrixRow_LSX(src_rgba, dst_y, width, &kRgb24I601Constants);
+}
+
+void RGBAToYJRow_LSX(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
+  RGBAToYMatrixRow_LSX(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
+  RGBAToYMatrixRow_LSX(src_bgra, dst_y, width, &kRawI601Constants);
+}
+
+static void RGBToYMatrixRow_LSX(const uint8_t* src_rgba,
+                                uint8_t* dst_y,
+                                int width,
+                                const struct RgbConstants* rgbconstants) {
+    int8_t shuff[64] = {0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
+                        24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
+                        1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
+                        25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
+    asm volatile(
+      "vldrepl.b      $vr0,  %3,    0             \n\t"  // load rgbconstants
+      "vldrepl.b      $vr1,  %3,    1             \n\t"  // load rgbconstants
+      "vldrepl.b      $vr2,  %3,    2             \n\t"  // load rgbconstants
+      "vldrepl.h      $vr3,  %3,    4             \n\t"  // load rgbconstants
+      "vld            $vr4,  %4,    0             \n\t"  // load shuff
+      "vld            $vr5,  %4,    16            \n\t"
+      "vld            $vr6,  %4,    32            \n\t"
+      "vld            $vr7,  %4,    48            \n\t"
+      "1:                                         \n\t"
+      "vld            $vr8,  %0,    0             \n\t"
+      "vld            $vr9,  %0,    16            \n\t"
+      "vld            $vr10, %0,    32            \n\t"  // load 16 pixels of RGB
+      "vor.v          $vr12, $vr3,  $vr3          \n\t"
+      "vor.v          $vr13, $vr3,  $vr3          \n\t"
+      "addi.d         %2,    %2,    -16           \n\t"  // 16 processed per loop.
+      "vshuf.b        $vr14, $vr9,  $vr8,  $vr4   \n\t"
+      "vshuf.b        $vr15, $vr9,  $vr10, $vr5   \n\t"
+      "vshuf.b        $vr16, $vr9,  $vr8,  $vr6   \n\t"
+      "vshuf.b        $vr17, $vr9,  $vr10, $vr7   \n\t"
+      "vmaddwev.h.bu  $vr12, $vr16, $vr1          \n\t"  //G
+      "vmaddwev.h.bu  $vr13, $vr17, $vr1          \n\t"
+      "vmaddwev.h.bu  $vr12, $vr14, $vr0          \n\t"  //B
+      "vmaddwev.h.bu  $vr13, $vr15, $vr0          \n\t"
+      "vmaddwod.h.bu  $vr12, $vr14, $vr2          \n\t"  //R
+      "vmaddwod.h.bu  $vr13, $vr15, $vr2          \n\t"
+      "addi.d         %0,    %0,    48            \n\t"
+      "vpickod.b      $vr10, $vr13, $vr12         \n\t"
+      "vst            $vr10, %1,    0             \n\t"
+      "addi.d         %1,    %1,    16            \n\t"
+      "bnez           %2,    1b                   \n\t"
+      : "+&r"(src_rgba),    // %0
+        "+&r"(dst_y),       // %1
+        "+&r"(width)        // %2
+      : "r"(rgbconstants),  // %3
+        "r"(shuff)          // %4
+      : "memory"
+    );
+}
+
+void RGB24ToYJRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
+  RGBToYMatrixRow_LSX(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void RAWToYJRow_LSX(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
+  RGBToYMatrixRow_LSX(src_raw, dst_yj, width, &kRawJPEGConstants);
+}
+
+void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
+  RGBToYMatrixRow_LSX(src_rgb24, dst_y, width, &kRgb24I601Constants);
+}
+
+void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
+  RGBToYMatrixRow_LSX(src_raw, dst_y, width, &kRawI601Constants);
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv
--- a/media/libyuv/libyuv/source/row_neon.cc
+++ b/media/libyuv/libyuv/source/row_neon.cc
@ -720,6 +720,60 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
 }
 #endif

+void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
+  const uint8_t* src_lower_bits = src;
+  const uint8_t* src_upper_bits = src + 16;
+  asm volatile(
+      "1:                                        \n"
+      "vld4.8      {d1, d3, d5, d7}, [%1]!       \n"  // Load 32 bytes of upper
+                                                      // bits.
+      "vld1.8      {d6}, [%0]!                   \n"  // Load 8 bytes of lower
+                                                      // bits.
+      "vshl.u8     d4, d6, #2                    \n"  // Align lower bits.
+      "vshl.u8     d2, d6, #4                    \n"
+      "vshl.u8     d0, d6, #6                    \n"
+      "vzip.u8     d0, d1                        \n"  // Zip lower and upper
+                                                      // bits together.
+      "vzip.u8     d2, d3                        \n"
+      "vzip.u8     d4, d5                        \n"
+      "vzip.u8     d6, d7                        \n"
+      "vsri.u16    q0, q0, #10                   \n"  // Copy upper 6 bits into
+                                                      // lower 6 bits for better
+                                                      // accuracy in
+                                                      // conversions.
+      "vsri.u16    q1, q1, #10                   \n"
+      "vsri.u16    q2, q2, #10                   \n"
+      "vsri.u16    q3, q3, #10                   \n"
+      "vst4.16     {d0, d2, d4, d6}, [%2]!       \n"  // Store 32 pixels
+      "vst4.16     {d1, d3, d5, d7}, [%2]!       \n"
+      "vld4.8      {d1, d3, d5, d7}, [%1]!       \n"  // Process last 32 pixels
+                                                      // in the block
+      "vld1.8      {d6}, [%0]!                   \n"
+      "vshl.u8     d4, d6, #2                    \n"
+      "vshl.u8     d2, d6, #4                    \n"
+      "vshl.u8     d0, d6, #6                    \n"
+      "vzip.u8     d0, d1                        \n"
+      "vzip.u8     d2, d3                        \n"
+      "vzip.u8     d4, d5                        \n"
+      "vzip.u8     d6, d7                        \n"
+      "vsri.u16    q0, q0, #10                   \n"
+      "vsri.u16    q1, q1, #10                   \n"
+      "vsri.u16    q2, q2, #10                   \n"
+      "vsri.u16    q3, q3, #10                   \n"
+      "vst4.16     {d0, d2, d4, d6}, [%2]!       \n"
+      "vst4.16     {d1, d3, d5, d7}, [%2]!       \n"
+      "mov         %0, %1                        \n"
+      "add         %1, %0, #16                   \n"
+      "subs        %3, %3, #80                   \n"
+      "bgt         1b                            \n"
+      : "+r"(src_lower_bits),  // %0
+        "+r"(src_upper_bits),  // %1
+        "+r"(dst),             // %2
+        "+r"(size)             // %3
+      :
+      : "cc", "memory", "q0", "q1", "q2", "q3");
+}
+
 // Reads 16 U's and V's and writes out 16 pairs of UV.
 void MergeUVRow_NEON(const uint8_t* src_u,
                     const uint8_t* src_v,
@ -3857,31 +3911,25 @@ void DivideRow_16_NEON(const uint16_t* src_y,
                       int scale,
                       int width) {
  asm volatile(
-      "vdup.16     q0, %3                        \n"
+      "vdup.16     d8, %3                        \n"
      "1:                                        \n"
-      "vld1.16     {q1}, [%0]!                   \n"
-      "vld1.16     {q2}, [%0]!                   \n"
-      "vmovl.u16   q3, d2                        \n"
-      "vmovl.u16   q1, d3                        \n"
-      "vmovl.u16   q4, d4                        \n"
-      "vmovl.u16   q2, d5                        \n"
-      "vshl.u32    q3, q3, q0                    \n"
-      "vshl.u32    q4, q4, q0                    \n"
-      "vshl.u32    q1, q1, q0                    \n"
-      "vshl.u32    q2, q2, q0                    \n"
-      "vmovn.u32   d2, q3                        \n"
-      "vmovn.u32   d3, q1                        \n"
-      "vmovn.u32   d4, q4                        \n"
-      "vmovn.u32   d5, q2                        \n"
-      "vst1.16     {q1}, [%1]!                   \n"
-      "vst1.16     {q2}, [%1]!                   \n"
+      "vld1.16     {q2, q3}, [%0]!               \n"
+      "vmull.u16   q0, d4, d8                    \n"
+      "vmull.u16   q1, d5, d8                    \n"
+      "vmull.u16   q2, d6, d8                    \n"
+      "vmull.u16   q3, d7, d8                    \n"
+      "vshrn.u32   d0, q0, #16                   \n"
+      "vshrn.u32   d1, q1, #16                   \n"
+      "vshrn.u32   d2, q2, #16                   \n"
+      "vshrn.u32   d3, q3, #16                   \n"
+      "vst1.16     {q0, q1}, [%1]!               \n"  // store 16 pixels
      "subs        %2, %2, #16                   \n"  // 16 src pixels per loop
      "bgt         1b                            \n"
      : "+r"(src_y),  // %0
        "+r"(dst_y),  // %1
        "+r"(width)   // %2
      : "r"(scale)    // %3
-      : "cc", "memory", "q0", "q1", "q2", "q3", "q4");
+      : "cc", "memory", "q0", "q1", "q2", "q3", "d8");
 }

 // Use scale to convert lsb formats to msb, depending how many bits there are:
--- a/media/libyuv/libyuv/source/row_neon64.cc
+++ b/media/libyuv/libyuv/source/row_neon64.cc
@ -749,6 +749,54 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
 }
 #endif

+// Unpack MT2T into tiled P010 64 pixels at a time. See
+// tinyurl.com/mtk-10bit-video-format for format documentation.
+void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
+  const uint8_t* src_lower_bits = src;
+  const uint8_t* src_upper_bits = src + 16;
+  asm volatile(
+      "1:                                        \n"
+      "ld4         {v0.8b, v1.8b, v2.8b, v3.8b}, [%1], #32 \n"
+      "ld1         {v7.8b}, [%0], #8             \n"
+      "shl         v6.8b, v7.8b, #2              \n"
+      "shl         v5.8b, v7.8b, #4              \n"
+      "shl         v4.8b, v7.8b, #6              \n"
+      "zip1        v0.16b, v4.16b, v0.16b        \n"
+      "zip1        v1.16b, v5.16b, v1.16b        \n"
+      "zip1        v2.16b, v6.16b, v2.16b        \n"
+      "zip1        v3.16b, v7.16b, v3.16b        \n"
+      "sri         v0.8h, v0.8h, #10             \n"
+      "sri         v1.8h, v1.8h, #10             \n"
+      "sri         v2.8h, v2.8h, #10             \n"
+      "sri         v3.8h, v3.8h, #10             \n"
+      "st4         {v0.8h, v1.8h, v2.8h, v3.8h}, [%2], #64 \n"
+      "ld4         {v0.8b, v1.8b, v2.8b, v3.8b}, [%1], #32 \n"
+      "ld1         {v7.8b}, [%0], #8             \n"
+      "shl         v6.8b, v7.8b, #2              \n"
+      "shl         v5.8b, v7.8b, #4              \n"
+      "shl         v4.8b, v7.8b, #6              \n"
+      "zip1        v0.16b, v4.16b, v0.16b        \n"
+      "zip1        v1.16b, v5.16b, v1.16b        \n"
+      "zip1        v2.16b, v6.16b, v2.16b        \n"
+      "zip1        v3.16b, v7.16b, v3.16b        \n"
+      "sri         v0.8h, v0.8h, #10             \n"
+      "sri         v1.8h, v1.8h, #10             \n"
+      "sri         v2.8h, v2.8h, #10             \n"
+      "sri         v3.8h, v3.8h, #10             \n"
+      "st4         {v0.8h, v1.8h, v2.8h, v3.8h}, [%2], #64 \n"
+      "mov         %0, %1                        \n"
+      "add         %1, %0, #16                   \n"
+      "subs        %3, %3, #80                   \n"
+      "b.gt        1b                            \n"
+      : "+r"(src_lower_bits),  // %0
+        "+r"(src_upper_bits),  // %1
+        "+r"(dst),             // %2
+        "+r"(size)             // %3
+      :
+      : "cc", "memory", "w0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+        "v8", "v9", "v10", "v11", "v12");
+}
+
 #if LIBYUV_USE_ST2
 // Reads 16 U's and V's and writes out 16 pairs of UV.
 void MergeUVRow_NEON(const uint8_t* src_u,
@ -4413,23 +4461,19 @@ void DivideRow_16_NEON(const uint16_t* src_y,
                       int scale,
                       int width) {
  asm volatile(
-      "dup         v0.8h, %w3                    \n"
+      "dup         v4.8h, %w3                    \n"
      "1:                                        \n"
-      "ldp         q1, q2, [%0], #32             \n"
-      "ushll       v3.4s, v1.4h, #0              \n"
-      "ushll       v4.4s, v2.4h, #0              \n"
+      "ldp         q2, q3, [%0], #32             \n"
+      "umull       v0.4s, v2.4h, v4.4h           \n"
+      "umull2      v1.4s, v2.8h, v4.8h           \n"
+      "umull       v2.4s, v3.4h, v4.4h           \n"
+      "umull2      v3.4s, v3.8h, v4.8h           \n"
      "prfm        pldl1keep, [%0, 448]          \n"
-      "ushll2      v1.4s, v1.8h, #0              \n"
-      "ushll2      v2.4s, v2.8h, #0              \n"
-      "mul         v3.4s, v0.4s, v3.4s           \n"
-      "mul         v4.4s, v0.4s, v4.4s           \n"
-      "mul         v1.4s, v0.4s, v1.4s           \n"
-      "mul         v2.4s, v0.4s, v2.4s           \n"
-      "shrn        v3.4h, v3.4s, #16             \n"
-      "shrn        v4.4h, v4.4s, #16             \n"
-      "shrn2       v3.8h, v1.4s, #16             \n"
-      "shrn2       v4.8h, v2.4s, #16             \n"
-      "stp         q3, q3, [%1], #32             \n"  // store 16 pixels
+      "shrn        v0.4h, v0.4s, #16             \n"
+      "shrn2       v0.8h, v1.4s, #16             \n"
+      "shrn        v1.4h, v2.4s, #16             \n"
+      "shrn2       v1.8h, v3.4s, #16             \n"
+      "stp         q0, q1, [%1], #32             \n"  // store 16 pixels
      "subs        %w2, %w2, #16                 \n"  // 16 src pixels per loop
      "b.gt        1b                            \n"
      : "+r"(src_y),  // %0
--- a/media/libyuv/libyuv/source/scale.cc
+++ b/media/libyuv/libyuv/source/scale.cc
@ -198,6 +198,51 @@ static void ScalePlaneDown2_16(int src_width,
  }
 }

+void ScalePlaneDown2_16To8(int src_width,
+                           int src_height,
+                           int dst_width,
+                           int dst_height,
+                           int src_stride,
+                           int dst_stride,
+                           const uint16_t* src_ptr,
+                           uint8_t* dst_ptr,
+                           int scale,
+                           enum FilterMode filtering) {
+  int y;
+  void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+                        uint8_t* dst_ptr, int dst_width, int scale) =
+      (src_width & 1)
+          ? (filtering == kFilterNone
+                 ? ScaleRowDown2_16To8_Odd_C
+                 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_Odd_C
+                                               : ScaleRowDown2Box_16To8_Odd_C))
+          : (filtering == kFilterNone
+                 ? ScaleRowDown2_16To8_C
+                 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C
+                                               : ScaleRowDown2Box_16To8_C));
+  int row_stride = src_stride * 2;
+  (void)dst_height;
+  if (!filtering) {
+    src_ptr += src_stride;  // Point to odd rows.
+    src_stride = 0;
+  }
+
+  if (filtering == kFilterLinear) {
+    src_stride = 0;
+  }
+  for (y = 0; y < src_height / 2; ++y) {
+    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width, scale);
+    src_ptr += row_stride;
+    dst_ptr += dst_stride;
+  }
+  if (src_height & 1) {
+    if (!filtering) {
+      src_ptr -= src_stride;  // Point to last row.
+    }
+    ScaleRowDown2(src_ptr, 0, dst_ptr, dst_width, scale);
+  }
+}
+
 // Scale plane, 1/4
 // This is an optimized version for scaling down a plane to 1/4 of
 // its original size.
@ -775,9 +820,9 @@ static void ScaleAddCols2_C(int dst_width,
    int ix = x >> 16;
    x += dx;
    boxwidth = MIN1((x >> 16) - ix);
-    *dst_ptr++ =
-        SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >>
-        16;
+    *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + ix) *
+                               scaletbl[boxwidth - minboxwidth] >>
+                           16);
  }
 }

@ -814,7 +859,7 @@ static void ScaleAddCols0_C(int dst_width,
  (void)dx;
  src_ptr += (x >> 16);
  for (i = 0; i < dst_width; ++i) {
-    *dst_ptr++ = src_ptr[i] * scaleval >> 16;
+    *dst_ptr++ = (uint8_t)(src_ptr[i] * scaleval >> 16);
  }
 }

@ -829,7 +874,7 @@ static void ScaleAddCols1_C(int dst_width,
  int i;
  x >>= 16;
  for (i = 0; i < dst_width; ++i) {
-    *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
+    *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + x) * scaleval >> 16);
    x += boxwidth;
  }
 }
--- a/media/libyuv/libyuv/source/scale_argb.cc
+++ b/media/libyuv/libyuv/source/scale_argb.cc
@ -58,9 +58,9 @@ static void ScaleARGBDown2(int src_width,
  assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
  // Advance to odd row, even column.
  if (filtering == kFilterBilinear) {
-    src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
+    src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
  } else {
-    src_argb += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 4;
+    src_argb += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 4;
  }

 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
@ -162,7 +162,7 @@ static void ScaleARGBDown4Box(int src_width,
                            uint8_t* dst_argb, int dst_width) =
      ScaleARGBRowDown2Box_C;
  // Advance to odd row, even column.
-  src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
+  src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
  (void)src_width;
  (void)src_height;
  (void)dx;
@ -214,7 +214,7 @@ static void ScaleARGBDownEven(int src_width,
                              enum FilterMode filtering) {
  int j;
  int col_step = dx >> 16;
-  int row_stride = (dy >> 16) * (int64_t)src_stride;
+  ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
  void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
                               int src_step, uint8_t* dst_argb, int dst_width) =
      filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
@ -222,7 +222,7 @@ static void ScaleARGBDownEven(int src_width,
  (void)src_height;
  assert(IS_ALIGNED(src_width, 2));
  assert(IS_ALIGNED(src_height, 2));
-  src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
+  src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
  if (TestCpuFlag(kCpuHasSSE2)) {
    ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
@ -289,10 +289,10 @@ static void ScaleARGBBilinearDown(int src_width,
                                  int dy,
                                  enum FilterMode filtering) {
  int j;
-  void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
+  void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
                         ptrdiff_t src_stride, int dst_width,
                         int source_y_fraction) = InterpolateRow_C;
-  void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+  void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
                              int dst_width, int x, int dx) =
      (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
  int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
@ -388,7 +388,7 @@ static void ScaleARGBBilinearDown(int src_width,
    }
    for (j = 0; j < dst_height; ++j) {
      int yi = y >> 16;
-      const uint8_t* src = src_argb + yi * (int64_t)src_stride;
+      const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
      if (filtering == kFilterLinear) {
        ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
      } else {
@ -421,10 +421,10 @@ static void ScaleARGBBilinearUp(int src_width,
                                int dy,
                                enum FilterMode filtering) {
  int j;
-  void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
+  void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
                         ptrdiff_t src_stride, int dst_width,
                         int source_y_fraction) = InterpolateRow_C;
-  void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+  void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
                              int dst_width, int x, int dx) =
      filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
  const int max_y = (src_height - 1) << 16;
@ -545,7 +545,7 @@ static void ScaleARGBBilinearUp(int src_width,

  {
    int yi = y >> 16;
-    const uint8_t* src = src_argb + yi * (int64_t)src_stride;
+    const uint8_t* src = src_argb + yi * (intptr_t)src_stride;

    // Allocate 2 rows of ARGB.
    const int row_size = (dst_width * 4 + 31) & ~31;
@ -570,7 +570,7 @@ static void ScaleARGBBilinearUp(int src_width,
        if (y > max_y) {
          y = max_y;
          yi = y >> 16;
-          src = src_argb + yi * (int64_t)src_stride;
+          src = src_argb + yi * (intptr_t)src_stride;
        }
        if (yi != lasty) {
          ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@ -668,7 +668,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
  }
 #endif

-  void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
+  void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
                         ptrdiff_t src_stride, int dst_width,
                         int source_y_fraction) = InterpolateRow_C;
 #if defined(HAS_INTERPOLATEROW_SSSE3)
@ -712,7 +712,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
  }
 #endif

-  void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+  void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
                              int dst_width, int x, int dx) =
      filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
  if (src_width >= 32768) {
@ -793,9 +793,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
  const int kYShift = 1;  // Shift Y by 1 to convert Y plane to UV coordinate.
  int yi = y >> 16;
  int uv_yi = yi >> kYShift;
-  const uint8_t* src_row_y = src_y + yi * (int64_t)src_stride_y;
-  const uint8_t* src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
-  const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
+  const uint8_t* src_row_y = src_y + yi * (intptr_t)src_stride_y;
+  const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
+  const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;

  // Allocate 2 rows of ARGB.
  const int row_size = (dst_width * 4 + 31) & ~31;
@ -833,9 +833,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
        y = max_y;
        yi = y >> 16;
        uv_yi = yi >> kYShift;
-        src_row_y = src_y + yi * (int64_t)src_stride_y;
-        src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
-        src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
+        src_row_y = src_y + yi * (intptr_t)src_stride_y;
+        src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
+        src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
      }
      if (yi != lasty) {
        // TODO(fbarchard): Convert the clipped region of row.
@ -883,7 +883,7 @@ static void ScaleARGBSimple(int src_width,
                            int y,
                            int dy) {
  int j;
-  void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+  void (*ScaleARGBCols)(uint8_t* dst_argb, const uint8_t* src_argb,
                        int dst_width, int x, int dx) =
      (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
  (void)src_height;
@ -926,7 +926,7 @@ static void ScaleARGBSimple(int src_width,
  }

  for (j = 0; j < dst_height; ++j) {
-    ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (int64_t)src_stride,
+    ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (intptr_t)src_stride,
                  dst_width, x, dx);
    dst_argb += dst_stride;
    y += dy;
@ -962,7 +962,7 @@ static void ScaleARGB(const uint8_t* src,
  // Negative src_height means invert the image.
  if (src_height < 0) {
    src_height = -src_height;
-    src = src + (src_height - 1) * (int64_t)src_stride;
+    src = src + (src_height - 1) * (intptr_t)src_stride;
    src_stride = -src_stride;
  }
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@ -977,7 +977,7 @@ static void ScaleARGB(const uint8_t* src,
  if (clip_y) {
    int64_t clipf = (int64_t)(clip_y)*dy;
    y += (clipf & 0xffff);
-    src += (clipf >> 16) * (int64_t)src_stride;
+    src += (clipf >> 16) * (intptr_t)src_stride;
    dst += clip_y * dst_stride;
  }

@ -1011,7 +1011,7 @@ static void ScaleARGB(const uint8_t* src,
        filtering = kFilterNone;
        if (dx == 0x10000 && dy == 0x10000) {
          // Straight copy.
-          ARGBCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 4,
+          ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4,
                   src_stride, dst, dst_stride, clip_width, clip_height);
          return;
        }
--- a/media/libyuv/libyuv/source/scale_common.cc
+++ b/media/libyuv/libyuv/source/scale_common.cc
@ -23,6 +23,25 @@ namespace libyuv {
 extern "C" {
 #endif

+#ifdef __cplusplus
+#define STATIC_CAST(type, expr) static_cast<type>(expr)
+#else
+#define STATIC_CAST(type, expr) (type)(expr)
+#endif
+
+// TODO(fbarchard): make clamp255 preserve negative values.
+static __inline int32_t clamp255(int32_t v) {
+  return (-(v >= 255) | v) & 255;
+}
+
+// Use scale to convert lsb formats to msb, depending how many bits there are:
+// 32768 = 9 bits
+// 16384 = 10 bits
+// 4096 = 12 bits
+// 256 = 16 bits
+// TODO(fbarchard): change scale to bits
+#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
+
 static __inline int Abs(int v) {
  return v >= 0 ? v : -v;
 }
@ -62,6 +81,50 @@ void ScaleRowDown2_16_C(const uint16_t* src_ptr,
  }
 }

+void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
+                           ptrdiff_t src_stride,
+                           uint8_t* dst,
+                           int dst_width,
+                           int scale) {
+  int x;
+  (void)src_stride;
+  assert(scale >= 256);
+  assert(scale <= 32768);
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
+    dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
+    dst += 2;
+    src_ptr += 4;
+  }
+  if (dst_width & 1) {
+    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
+  }
+}
+
+void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint8_t* dst,
+                               int dst_width,
+                               int scale) {
+  int x;
+  (void)src_stride;
+  assert(scale >= 256);
+  assert(scale <= 32768);
+  dst_width -= 1;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
+    dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
+    dst += 2;
+    src_ptr += 4;
+  }
+  if (dst_width & 1) {
+    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
+    dst += 1;
+    src_ptr += 2;
+  }
+  dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[0], scale));
+}
+
 void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
                           ptrdiff_t src_stride,
                           uint8_t* dst,
@ -98,6 +161,52 @@ void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
  }
 }

+void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
+                                 ptrdiff_t src_stride,
+                                 uint8_t* dst,
+                                 int dst_width,
+                                 int scale) {
+  const uint16_t* s = src_ptr;
+  int x;
+  (void)src_stride;
+  assert(scale >= 256);
+  assert(scale <= 32768);
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
+    dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
+    dst += 2;
+    s += 4;
+  }
+  if (dst_width & 1) {
+    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
+  }
+}
+
+void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr,
+                                     ptrdiff_t src_stride,
+                                     uint8_t* dst,
+                                     int dst_width,
+                                     int scale) {
+  const uint16_t* s = src_ptr;
+  int x;
+  (void)src_stride;
+  assert(scale >= 256);
+  assert(scale <= 32768);
+  dst_width -= 1;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
+    dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
+    dst += 2;
+    s += 4;
+  }
+  if (dst_width & 1) {
+    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
+    dst += 1;
+    s += 2;
+  }
+  dst[0] = STATIC_CAST(uint8_t, C16TO8(s[0], scale));
+}
+
 void ScaleRowDown2Box_C(const uint8_t* src_ptr,
                        ptrdiff_t src_stride,
                        uint8_t* dst,
@ -160,6 +269,61 @@ void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
  }
 }

+void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst,
+                              int dst_width,
+                              int scale) {
+  const uint16_t* s = src_ptr;
+  const uint16_t* t = src_ptr + src_stride;
+  int x;
+  assert(scale >= 256);
+  assert(scale <= 32768);
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = STATIC_CAST(uint8_t,
+                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
+    dst[1] = STATIC_CAST(uint8_t,
+                         C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
+    dst += 2;
+    s += 4;
+    t += 4;
+  }
+  if (dst_width & 1) {
+    dst[0] = STATIC_CAST(uint8_t,
+                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
+  }
+}
+
+void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr,
+                                  ptrdiff_t src_stride,
+                                  uint8_t* dst,
+                                  int dst_width,
+                                  int scale) {
+  const uint16_t* s = src_ptr;
+  const uint16_t* t = src_ptr + src_stride;
+  int x;
+  assert(scale >= 256);
+  assert(scale <= 32768);
+  dst_width -= 1;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = STATIC_CAST(uint8_t,
+                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
+    dst[1] = STATIC_CAST(uint8_t,
+                         C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
+    dst += 2;
+    s += 4;
+    t += 4;
+  }
+  if (dst_width & 1) {
+    dst[0] = STATIC_CAST(uint8_t,
+                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
+    dst += 1;
+    s += 2;
+    t += 2;
+  }
+  dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + t[0] + 1) >> 1, scale));
+}
+
 void ScaleRowDown4_C(const uint8_t* src_ptr,
                     ptrdiff_t src_stride,
                     uint8_t* dst,
--- a/media/libyuv/libyuv/source/scale_uv.cc
+++ b/media/libyuv/libyuv/source/scale_uv.cc
@ -83,9 +83,9 @@ static void ScaleUVDown2(int src_width,
  assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
  // Advance to odd row, even column.
  if (filtering == kFilterBilinear) {
-    src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
+    src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
  } else {
-    src_uv += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 2;
+    src_uv += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 2;
  }

 #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
@ -200,7 +200,7 @@ static void ScaleUVDown4Box(int src_width,
                          uint8_t* dst_uv, int dst_width) =
      ScaleUVRowDown2Box_C;
  // Advance to odd row, even column.
-  src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
+  src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
  (void)src_width;
  (void)src_height;
  (void)dx;
@ -263,7 +263,7 @@ static void ScaleUVDownEven(int src_width,
                            enum FilterMode filtering) {
  int j;
  int col_step = dx >> 16;
-  int row_stride = (dy >> 16) * (int64_t)src_stride;
+  ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
  void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride,
                             int src_step, uint8_t* dst_uv, int dst_width) =
      filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C;
@ -271,7 +271,7 @@ static void ScaleUVDownEven(int src_width,
  (void)src_height;
  assert(IS_ALIGNED(src_width, 2));
  assert(IS_ALIGNED(src_height, 2));
-  src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
+  src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
 #if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
    ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
@ -338,10 +338,10 @@ static void ScaleUVBilinearDown(int src_width,
                                int dy,
                                enum FilterMode filtering) {
  int j;
-  void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
+  void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
                         ptrdiff_t src_stride, int dst_width,
                         int source_y_fraction) = InterpolateRow_C;
-  void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
+  void (*ScaleUVFilterCols)(uint8_t* dst_uv, const uint8_t* src_uv,
                            int dst_width, int x, int dx) =
      (src_width >= 32768) ? ScaleUVFilterCols64_C : ScaleUVFilterCols_C;
  int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
@ -429,7 +429,7 @@ static void ScaleUVBilinearDown(int src_width,
    }
    for (j = 0; j < dst_height; ++j) {
      int yi = y >> 16;
-      const uint8_t* src = src_uv + yi * (int64_t)src_stride;
+      const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
      if (filtering == kFilterLinear) {
        ScaleUVFilterCols(dst_uv, src, dst_width, x, dx);
      } else {
@ -464,10 +464,10 @@ static void ScaleUVBilinearUp(int src_width,
                              int dy,
                              enum FilterMode filtering) {
  int j;
-  void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
+  void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
                         ptrdiff_t src_stride, int dst_width,
                         int source_y_fraction) = InterpolateRow_C;
-  void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
+  void (*ScaleUVFilterCols)(uint8_t* dst_uv, const uint8_t* src_uv,
                            int dst_width, int x, int dx) =
      filtering ? ScaleUVFilterCols_C : ScaleUVCols_C;
  const int max_y = (src_height - 1) << 16;
@ -571,7 +571,7 @@ static void ScaleUVBilinearUp(int src_width,

  {
    int yi = y >> 16;
-    const uint8_t* src = src_uv + yi * (int64_t)src_stride;
+    const uint8_t* src = src_uv + yi * (intptr_t)src_stride;

    // Allocate 2 rows of UV.
    const int row_size = (dst_width * 2 + 15) & ~15;
@ -596,7 +596,7 @@ static void ScaleUVBilinearUp(int src_width,
        if (y > max_y) {
          y = max_y;
          yi = y >> 16;
-          src = src_uv + yi * (int64_t)src_stride;
+          src = src_uv + yi * (intptr_t)src_stride;
        }
        if (yi != lasty) {
          ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@ -663,13 +663,13 @@ void ScaleUVLinearUp2(int src_width,
 #endif

  if (dst_height == 1) {
-    ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
+    ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
               dst_width);
  } else {
    dy = FixedDiv(src_height - 1, dst_height - 1);
    y = (1 << 15) - 1;
    for (i = 0; i < dst_height; ++i) {
-      ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
+      ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
      dst_uv += dst_stride;
      y += dy;
    }
@ -770,13 +770,13 @@ void ScaleUVLinearUp2_16(int src_width,
 #endif

  if (dst_height == 1) {
-    ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
+    ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
               dst_width);
  } else {
    dy = FixedDiv(src_height - 1, dst_height - 1);
    y = (1 << 15) - 1;
    for (i = 0; i < dst_height; ++i) {
-      ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
+      ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
      dst_uv += dst_stride;
      y += dy;
    }
@ -854,7 +854,7 @@ static void ScaleUVSimple(int src_width,
                          int y,
                          int dy) {
  int j;
-  void (*ScaleUVCols)(uint8_t * dst_uv, const uint8_t* src_uv, int dst_width,
+  void (*ScaleUVCols)(uint8_t* dst_uv, const uint8_t* src_uv, int dst_width,
                      int x, int dx) =
      (src_width >= 32768) ? ScaleUVCols64_C : ScaleUVCols_C;
  (void)src_height;
@ -889,7 +889,7 @@ static void ScaleUVSimple(int src_width,
  }

  for (j = 0; j < dst_height; ++j) {
-    ScaleUVCols(dst_uv, src_uv + (y >> 16) * (int64_t)src_stride, dst_width, x,
+    ScaleUVCols(dst_uv, src_uv + (y >> 16) * (intptr_t)src_stride, dst_width, x,
                dx);
    dst_uv += dst_stride;
    y += dy;
@ -910,7 +910,7 @@ static int UVCopy(const uint8_t* src_uv,
  // Negative height means invert the image.
  if (height < 0) {
    height = -height;
-    src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
+    src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
    src_stride_uv = -src_stride_uv;
  }

@ -930,7 +930,7 @@ static int UVCopy_16(const uint16_t* src_uv,
  // Negative height means invert the image.
  if (height < 0) {
    height = -height;
-    src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
+    src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
    src_stride_uv = -src_stride_uv;
  }

@ -968,7 +968,7 @@ static void ScaleUV(const uint8_t* src,
  // Negative src_height means invert the image.
  if (src_height < 0) {
    src_height = -src_height;
-    src = src + (src_height - 1) * (int64_t)src_stride;
+    src = src + (src_height - 1) * (intptr_t)src_stride;
    src_stride = -src_stride;
  }
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@ -983,7 +983,7 @@ static void ScaleUV(const uint8_t* src,
  if (clip_y) {
    int64_t clipf = (int64_t)(clip_y)*dy;
    y += (clipf & 0xffff);
-    src += (clipf >> 16) * (int64_t)src_stride;
+    src += (clipf >> 16) * (intptr_t)src_stride;
    dst += clip_y * dst_stride;
  }

@ -1024,7 +1024,7 @@ static void ScaleUV(const uint8_t* src,
 #ifdef HAS_UVCOPY
        if (dx == 0x10000 && dy == 0x10000) {
          // Straight copy.
-          UVCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 2,
+          UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2,
                 src_stride, dst, dst_stride, clip_width, clip_height);
          return;
        }
@ -1118,7 +1118,7 @@ int UVScale_16(const uint16_t* src_uv,
  // Negative src_height means invert the image.
  if (src_height < 0) {
    src_height = -src_height;
-    src_uv = src_uv + (src_height - 1) * (int64_t)src_stride_uv;
+    src_uv = src_uv + (src_height - 1) * (intptr_t)src_stride_uv;
    src_stride_uv = -src_stride_uv;
  }
  src_width = Abs(src_width);
@ -1126,13 +1126,13 @@ int UVScale_16(const uint16_t* src_uv,
 #ifdef HAS_UVCOPY
  if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) {
    if (dst_height == 1) {
-      UVCopy_16(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride_uv,
+      UVCopy_16(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride_uv,
                src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height);
    } else {
      dy = src_height / dst_height;
-      UVCopy_16(src_uv + ((dy - 1) / 2) * (int64_t)src_stride_uv,
-                dy * (int64_t)src_stride_uv, dst_uv, dst_stride_uv, dst_width,
-                dst_height);
+      UVCopy_16(src_uv + ((dy - 1) / 2) * (intptr_t)src_stride_uv,
+                (int)(dy * (intptr_t)src_stride_uv), dst_uv, dst_stride_uv,
+                dst_width, dst_height);
    }

    return 0;
--- a/media/libyuv/libyuv/unit_test/convert_test.cc
+++ b/media/libyuv/libyuv/unit_test/convert_test.cc
@ -48,6 +48,7 @@ namespace libyuv {
 #define AR30ToAR30 ARGBCopy
 #define ABGRToABGR ARGBCopy

+// subsample amount uses a divide.
 #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))

 // Planar test
@ -180,6 +181,7 @@ TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12)
 TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10)
 TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10)
 TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10)
+TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 10)
 TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10)
 TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12)
 TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12)
@ -417,131 +419,136 @@ TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10)
 TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12)
 TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)

-#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,       \
-                          SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,           \
-                          DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF,    \
-                          DOY, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)             \
-  TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {               \
-    static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported");        \
-    static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported");        \
-    static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2,                    \
-                  "SRC_SUBSAMP_X unsupported");                                \
-    static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2,                    \
-                  "SRC_SUBSAMP_Y unsupported");                                \
-    static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2,                    \
-                  "DST_SUBSAMP_X unsupported");                                \
-    static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2,                    \
-                  "DST_SUBSAMP_Y unsupported");                                \
-    const int kWidth = W1280;                                                  \
-    const int kHeight = benchmark_height_;                                     \
-    const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X);                \
-    const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X);                \
-    const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y);              \
-    const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1);  \
-    const int kPaddedHeight =                                                  \
-        (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1);                    \
-    const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X);    \
-    const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y);  \
-    align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF);  \
-    align_buffer_page_end(                                                     \
-        src_uv,                                                                \
-        2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF);       \
-    align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC);                  \
-    align_buffer_page_end(dst_uv_c,                                            \
-                          2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);       \
-    align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC);                \
-    align_buffer_page_end(dst_uv_opt,                                          \
-                          2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);       \
-    SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF);                    \
-    SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF);                  \
-    for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) {                   \
-      src_y_p[i] =                                                             \
-          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));       \
-    }                                                                          \
-    for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \
-      src_uv_p[i] =                                                            \
-          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));       \
-    }                                                                          \
-    memset(dst_y_c, 1, kWidth* kHeight* DST_BPC);                              \
-    memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);         \
-    memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC);                          \
-    memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);     \
-    MaskCpuFlags(disable_cpu_flags_);                                          \
-    SRC_FMT_PLANAR##To##FMT_PLANAR(                                            \
-        src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth,                          \
-        DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth,                \
-        reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth,         \
-        NEG kHeight);                                                          \
-    MaskCpuFlags(benchmark_cpu_info_);                                         \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
-      SRC_FMT_PLANAR##To##FMT_PLANAR(                                          \
-          src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth,                        \
-          DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth,            \
-          reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth,     \
-          NEG kHeight);                                                        \
-    }                                                                          \
-    if (DOY) {                                                                 \
-      for (int i = 0; i < kHeight; ++i) {                                      \
-        for (int j = 0; j < kWidth; ++j) {                                     \
-          EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]);       \
-        }                                                                      \
-      }                                                                        \
-    }                                                                          \
-    for (int i = 0; i < kDstHalfHeight; ++i) {                                 \
-      for (int j = 0; j < 2 * kDstHalfWidth; ++j) {                            \
-        EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j],                         \
-                  dst_uv_opt[i * 2 * kDstHalfWidth + j]);                      \
-      }                                                                        \
-    }                                                                          \
-    free_aligned_buffer_page_end(dst_y_c);                                     \
-    free_aligned_buffer_page_end(dst_uv_c);                                    \
-    free_aligned_buffer_page_end(dst_y_opt);                                   \
-    free_aligned_buffer_page_end(dst_uv_opt);                                  \
-    free_aligned_buffer_page_end(src_y);                                       \
-    free_aligned_buffer_page_end(src_uv);                                      \
+#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
+                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+                    DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH,        \
+                    TILE_WIDTH, TILE_HEIGHT)                                  \
+  TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {              \
+    static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported");       \
+    static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2,                   \
+                  "SRC_SUBSAMP_X unsupported");                               \
+    static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2,                   \
+                  "SRC_SUBSAMP_Y unsupported");                               \
+    static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2,                   \
+                  "DST_SUBSAMP_X unsupported");                               \
+    static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2,                   \
+                  "DST_SUBSAMP_Y unsupported");                               \
+    const int kWidth = W1280;                                                 \
+    const int kHeight = benchmark_height_;                                    \
+    const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X);               \
+    const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X);               \
+    const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y);             \
+    const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
+    const int kPaddedHeight =                                                 \
+        (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1);                   \
+    const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X);   \
+    const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
+    align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
+    align_buffer_page_end(                                                    \
+        src_uv,                                                               \
+        2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF);      \
+    align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC);                 \
+    align_buffer_page_end(dst_uv_c,                                           \
+                          2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);      \
+    align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC);               \
+    align_buffer_page_end(dst_uv_opt,                                         \
+                          2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);      \
+    SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF);                   \
+    SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF);                 \
+    for (int i = 0;                                                           \
+         i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T);     \
+         ++i) {                                                               \
+      src_y_p[i] =                                                            \
+          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));      \
+    }                                                                         \
+    for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 *      \
+                            SRC_BPC / (int)sizeof(SRC_T);                     \
+         ++i) {                                                               \
+      src_uv_p[i] =                                                           \
+          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));      \
+    }                                                                         \
+    memset(dst_y_c, 1, kWidth* kHeight* DST_BPC);                             \
+    memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);        \
+    memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC);                         \
+    memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);    \
+    MaskCpuFlags(disable_cpu_flags_);                                         \
+    SRC_FMT_PLANAR##To##FMT_PLANAR(                                           \
+        src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p,              \
+        2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T),                     \
+        DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth,               \
+        reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth,        \
+        NEG kHeight);                                                         \
+    MaskCpuFlags(benchmark_cpu_info_);                                        \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
+      SRC_FMT_PLANAR##To##FMT_PLANAR(                                         \
+          src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p,            \
+          2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T),                   \
+          DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth,           \
+          reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth,    \
+          NEG kHeight);                                                       \
+    }                                                                         \
+    if (DOY) {                                                                \
+      for (int i = 0; i < kHeight; ++i) {                                     \
+        for (int j = 0; j < kWidth; ++j) {                                    \
+          EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]);      \
+        }                                                                     \
+      }                                                                       \
+    }                                                                         \
+    for (int i = 0; i < kDstHalfHeight; ++i) {                                \
+      for (int j = 0; j < 2 * kDstHalfWidth; ++j) {                           \
+        EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j],                        \
+                  dst_uv_opt[i * 2 * kDstHalfWidth + j]);                     \
+      }                                                                       \
+    }                                                                         \
+    free_aligned_buffer_page_end(dst_y_c);                                    \
+    free_aligned_buffer_page_end(dst_uv_c);                                   \
+    free_aligned_buffer_page_end(dst_y_opt);                                  \
+    free_aligned_buffer_page_end(dst_uv_opt);                                 \
+    free_aligned_buffer_page_end(src_y);                                      \
+    free_aligned_buffer_page_end(src_uv);                                     \
  }

-#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,        \
-                         SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,            \
-                         DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH,  \
-                         TILE_HEIGHT)                                          \
-  TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,             \
-                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,  \
-                    DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, 1,        \
-                    SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)                        \
-  TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,             \
-                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,  \
-                    DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, 1,      \
-                    SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)                        \
-  TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,             \
-                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,  \
-                    DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1,         \
-                    SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)                        \
-  TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,             \
-                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,  \
-                    DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, \
-                    TILE_WIDTH, TILE_HEIGHT)                                   \
-  TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,             \
-                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,  \
-                    DST_SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0,          \
-                    SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
+#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
+                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+                   DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)        \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH,    \
+              TILE_HEIGHT)                                                   \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH,  \
+              TILE_HEIGHT)                                                   \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH,     \
+              TILE_HEIGHT)                                                   \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH,        \
+              TILE_HEIGHT)                                                   \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH,      \
+              TILE_HEIGHT)

-TESTBIPLANARTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
-TESTBIPLANARTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
-TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
+TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
+TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
+TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
+TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32)

-#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,        \
-                         SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,            \
-                         DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF,     \
-                         SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)                   \
+#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
+                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
+                   DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH,   \
+                   TILE_HEIGHT)                                                \
  TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {               \
    static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported");        \
    static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported");        \
@ -621,30 +628,30 @@ TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
    free_aligned_buffer_page_end(src_uv);                                      \
  }

-#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,         \
-                        SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,             \
-                        DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH,   \
-                        TILE_HEIGHT)                                           \
-  TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
-                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
-                   DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, \
-                   TILE_WIDTH, TILE_HEIGHT)                                    \
-  TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
-                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
-                   DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2,          \
-                   SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)                         \
-  TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
-                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
-                   DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH,  \
-                   TILE_WIDTH, TILE_HEIGHT)                                    \
-  TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
-                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
-                   DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH,     \
-                   TILE_WIDTH, TILE_HEIGHT)
+#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
+                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+                  DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)        \
+  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+             benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH,       \
+             TILE_HEIGHT)                                                   \
+  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+             benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH,     \
+             TILE_HEIGHT)                                                   \
+  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+             benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH,        \
+             TILE_HEIGHT)                                                   \
+  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+             benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)

-TESTBIPLANARTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
+TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
+TESTBPTOP(P010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10, 1, 1)
+TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1)

 // Provide matrix wrappers for full range bt.709
 #define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \
@ -1069,8 +1076,8 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
 TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
 #endif

-#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C,       \
-                         BPP_B, W1280, N, NEG, OFF)                            \
+#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+                   W1280, N, NEG, OFF)                                         \
  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
    const int kWidth = W1280;                                                  \
    const int kHeight = benchmark_height_;                                     \
@ -1123,15 +1130,15 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
    free_aligned_buffer_page_end(dst_argb32_opt);                              \
  }

-#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
-  TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
-                   benchmark_width_ + 1, _Any, +, 0)                           \
-  TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
-                   benchmark_width_, _Unaligned, +, 2)                         \
-  TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
-                   benchmark_width_, _Invert, -, 0)                            \
-  TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
-                   benchmark_width_, _Opt, +, 0)
+#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
+  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+             benchmark_width_ + 1, _Any, +, 0)                           \
+  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+             benchmark_width_, _Unaligned, +, 2)                         \
+  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+             benchmark_width_, _Invert, -, 0)                            \
+  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+             benchmark_width_, _Opt, +, 0)

 #define JNV12ToARGB(a, b, c, d, e, f, g, h) \
  NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
@ -1152,29 +1159,29 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
 #define JNV12ToRGB565(a, b, c, d, e, f, g, h) \
  NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)

-TESTBIPLANARTOB(JNV12, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(JNV21, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(JNV12, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(JNV21, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(JNV12, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(JNV21, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(JNV12, 2, 2, RAW, RAW, 3)
-TESTBIPLANARTOB(JNV21, 2, 2, RAW, RAW, 3)
+TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3)
+TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3)
 #ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBIPLANARTOB(JNV12, 2, 2, RGB565, RGB565, 2)
+TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2)
 #endif

-TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3)
-TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3)
-TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3)
+TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(NV12, 2, 2, RAW, RAW, 3)
+TESTBPTOB(NV21, 2, 2, RAW, RAW, 3)
+TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3)
 #ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2)
+TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
 #endif

 #define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
@ -1269,8 +1276,8 @@ TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1)
 TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2)
 TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)

-#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X,          \
-                         SUBSAMP_Y, W1280, N, NEG, OFF)                       \
+#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,     \
+                   W1280, N, NEG, OFF)                                        \
  TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) {                       \
    const int kWidth = W1280;                                                 \
    const int kHeight = benchmark_height_;                                    \
@ -1316,25 +1323,25 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
    free_aligned_buffer_page_end(src_argb);                                   \
  }

-#define TESTATOBIPLANAR(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
-  TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                   benchmark_width_ + 1, _Any, +, 0)                           \
-  TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                   benchmark_width_, _Unaligned, +, 2)                         \
-  TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                   benchmark_width_, _Invert, -, 0)                            \
-  TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                   benchmark_width_, _Opt, +, 0)
+#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+             benchmark_width_ + 1, _Any, +, 0)                           \
+  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+             benchmark_width_, _Unaligned, +, 2)                         \
+  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+             benchmark_width_, _Invert, -, 0)                            \
+  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+             benchmark_width_, _Opt, +, 0)

-TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2)
-TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2)
-TESTATOBIPLANAR(ABGR, 1, 4, NV12, 2, 2)
-TESTATOBIPLANAR(ABGR, 1, 4, NV21, 2, 2)
-TESTATOBIPLANAR(RAW, 1, 3, JNV21, 2, 2)
-TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2)
-TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
-TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)
-TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
+TESTATOBP(ARGB, 1, 4, NV12, 2, 2)
+TESTATOBP(ARGB, 1, 4, NV21, 2, 2)
+TESTATOBP(ABGR, 1, 4, NV12, 2, 2)
+TESTATOBP(ABGR, 1, 4, NV21, 2, 2)
+TESTATOBP(RAW, 1, 3, JNV21, 2, 2)
+TESTATOBP(YUY2, 2, 4, NV12, 2, 2)
+TESTATOBP(UYVY, 2, 4, NV12, 2, 2)
+TESTATOBP(AYUV, 1, 4, NV12, 2, 2)
+TESTATOBP(AYUV, 1, 4, NV21, 2, 2)

 #define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,     \
                  EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF)               \
@ -3915,8 +3922,8 @@ TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10)
 TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
 #endif  // DISABLE_SLOW_TESTS

-#define TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,     \
-                           ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH)  \
+#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,    \
+                     YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH)               \
  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
    const int kWidth = W1280;                                                  \
    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
@ -3959,16 +3966,16 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
    free_aligned_buffer_page_end(dst_argb_opt);                                \
  }

-#define TESTBIPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,    \
-                          ALIGN, YALIGN, S_DEPTH)                            \
-  TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
-                     YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH)   \
-  TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
-                     YALIGN, benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \
-  TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
-                     YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH)    \
-  TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
-                     YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
+#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,    \
+                    YALIGN, S_DEPTH)                                          \
+  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+               benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH)                  \
+  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+               benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH)                \
+  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+               benchmark_width_, _Invert, -, 0, 0, S_DEPTH)                   \
+  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+               benchmark_width_, _Opt, +, 0, 0, S_DEPTH)

 #define P010ToARGB(a, b, c, d, e, f, g, h) \
  P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
@ -4011,23 +4018,23 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
                         kFilterBilinear)

 #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
-TESTBIPLANAR16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
+TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
+TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
+TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
+TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
+TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
+TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
 #ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBIPLANAR16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
+TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
+TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
+TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
+TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
+TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
+TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
 #endif  // LITTLE_ENDIAN_ONLY_TEST
 #endif  // DISABLE_SLOW_TESTS

--- a/media/libyuv/libyuv/unit_test/rotate_argb_test.cc
+++ b/media/libyuv/libyuv/unit_test/rotate_argb_test.cc
@ -225,4 +225,110 @@ TEST_F(LibYUVRotateTest, RotatePlane90_TestStride) {
  free_aligned_buffer_page_end(src_argb);
 }

+static void TestRotatePlane_16(int src_width,
+                               int src_height,
+                               int dst_width,
+                               int dst_height,
+                               libyuv::RotationMode mode,
+                               int benchmark_iterations,
+                               int disable_cpu_flags,
+                               int benchmark_cpu_info) {
+  if (src_width < 1) {
+    src_width = 1;
+  }
+  if (src_height < 1) {
+    src_height = 1;
+  }
+  if (dst_width < 1) {
+    dst_width = 1;
+  }
+  if (dst_height < 1) {
+    dst_height = 1;
+  }
+  int src_stride = src_width;
+  int src_plane_size = src_stride * abs(src_height);
+  align_buffer_page_end_16(src, src_plane_size);
+  for (int i = 0; i < src_plane_size; ++i) {
+    src[i] = fastrand() & 0xff;
+  }
+
+  int dst_stride = dst_width;
+  int dst_plane_size = dst_stride * dst_height;
+  align_buffer_page_end_16(dst_c, dst_plane_size);
+  align_buffer_page_end_16(dst_opt, dst_plane_size);
+  memset(dst_c, 2, dst_plane_size);
+  memset(dst_opt, 3, dst_plane_size);
+
+  MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
+  RotatePlane_16(src, src_stride, dst_c, dst_stride, src_width, src_height,
+                 mode);
+
+  MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
+  for (int i = 0; i < benchmark_iterations; ++i) {
+    RotatePlane_16(src, src_stride, dst_opt, dst_stride, src_width, src_height,
+                   mode);
+  }
+
+  // Rotation should be exact.
+  for (int i = 0; i < dst_plane_size; ++i) {
+    EXPECT_EQ(dst_c[i], dst_opt[i]);
+  }
+
+  free_aligned_buffer_page_end_16(dst_c);
+  free_aligned_buffer_page_end_16(dst_opt);
+  free_aligned_buffer_page_end_16(src);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane0_16_Opt) {
+  TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_,
+                     benchmark_height_, kRotate0, benchmark_iterations_,
+                     disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane90_16_Opt) {
+  TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_,
+                     benchmark_width_, kRotate90, benchmark_iterations_,
+                     disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane180_16_Opt) {
+  TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_,
+                     benchmark_height_, kRotate180, benchmark_iterations_,
+                     disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane270_16_Opt) {
+  TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_,
+                     benchmark_width_, kRotate270, benchmark_iterations_,
+                     disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane0_16_Odd) {
+  TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+                     benchmark_width_ + 1, benchmark_height_ + 1, kRotate0,
+                     benchmark_iterations_, disable_cpu_flags_,
+                     benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane90_16_Odd) {
+  TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+                     benchmark_height_ + 1, benchmark_width_ + 1, kRotate90,
+                     benchmark_iterations_, disable_cpu_flags_,
+                     benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane180_16_Odd) {
+  TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+                     benchmark_width_ + 1, benchmark_height_ + 1, kRotate180,
+                     benchmark_iterations_, disable_cpu_flags_,
+                     benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane270_16_Odd) {
+  TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+                     benchmark_height_ + 1, benchmark_width_ + 1, kRotate270,
+                     benchmark_iterations_, disable_cpu_flags_,
+                     benchmark_cpu_info_);
+}
+
 }  // namespace libyuv
--- a/media/libyuv/libyuv/unit_test/rotate_test.cc
+++ b/media/libyuv/libyuv/unit_test/rotate_test.cc
@ -596,4 +596,266 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
 #undef TESTAPLANARTOP
 #undef TESTAPLANARTOPI

+static void I010TestRotate(int src_width,
+                           int src_height,
+                           int dst_width,
+                           int dst_height,
+                           libyuv::RotationMode mode,
+                           int benchmark_iterations,
+                           int disable_cpu_flags,
+                           int benchmark_cpu_info) {
+  if (src_width < 1) {
+    src_width = 1;
+  }
+  if (src_height == 0) {
+    src_height = 1;
+  }
+  if (dst_width < 1) {
+    dst_width = 1;
+  }
+  if (dst_height < 1) {
+    dst_height = 1;
+  }
+  int src_i010_y_size = src_width * Abs(src_height);
+  int src_i010_uv_size = ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2);
+  int src_i010_size = src_i010_y_size + src_i010_uv_size * 2;
+  align_buffer_page_end_16(src_i010, src_i010_size);
+  for (int i = 0; i < src_i010_size; ++i) {
+    src_i010[i] = fastrand() & 0x3ff;
+  }
+
+  int dst_i010_y_size = dst_width * dst_height;
+  int dst_i010_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2);
+  int dst_i010_size = dst_i010_y_size + dst_i010_uv_size * 2;
+  align_buffer_page_end_16(dst_i010_c, dst_i010_size);
+  align_buffer_page_end_16(dst_i010_opt, dst_i010_size);
+  memset(dst_i010_c, 2, dst_i010_size * 2);
+  memset(dst_i010_opt, 3, dst_i010_size * 2);
+
+  MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
+  I010Rotate(src_i010, src_width, src_i010 + src_i010_y_size,
+             (src_width + 1) / 2, src_i010 + src_i010_y_size + src_i010_uv_size,
+             (src_width + 1) / 2, dst_i010_c, dst_width,
+             dst_i010_c + dst_i010_y_size, (dst_width + 1) / 2,
+             dst_i010_c + dst_i010_y_size + dst_i010_uv_size,
+             (dst_width + 1) / 2, src_width, src_height, mode);
+
+  MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
+  for (int i = 0; i < benchmark_iterations; ++i) {
+    I010Rotate(
+        src_i010, src_width, src_i010 + src_i010_y_size, (src_width + 1) / 2,
+        src_i010 + src_i010_y_size + src_i010_uv_size, (src_width + 1) / 2,
+        dst_i010_opt, dst_width, dst_i010_opt + dst_i010_y_size,
+        (dst_width + 1) / 2, dst_i010_opt + dst_i010_y_size + dst_i010_uv_size,
+        (dst_width + 1) / 2, src_width, src_height, mode);
+  }
+
+  // Rotation should be exact.
+  for (int i = 0; i < dst_i010_size; ++i) {
+    EXPECT_EQ(dst_i010_c[i], dst_i010_opt[i]);
+  }
+
+  free_aligned_buffer_page_end_16(dst_i010_c);
+  free_aligned_buffer_page_end_16(dst_i010_opt);
+  free_aligned_buffer_page_end_16(src_i010);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate0_Opt) {
+  I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+                 benchmark_height_, kRotate0, benchmark_iterations_,
+                 disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate90_Opt) {
+  I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+                 benchmark_width_, kRotate90, benchmark_iterations_,
+                 disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate180_Opt) {
+  I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+                 benchmark_height_, kRotate180, benchmark_iterations_,
+                 disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate270_Opt) {
+  I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+                 benchmark_width_, kRotate270, benchmark_iterations_,
+                 disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+static void I210TestRotate(int src_width,
+                           int src_height,
+                           int dst_width,
+                           int dst_height,
+                           libyuv::RotationMode mode,
+                           int benchmark_iterations,
+                           int disable_cpu_flags,
+                           int benchmark_cpu_info) {
+  if (src_width < 1) {
+    src_width = 1;
+  }
+  if (src_height == 0) {
+    src_height = 1;
+  }
+  if (dst_width < 1) {
+    dst_width = 1;
+  }
+  if (dst_height < 1) {
+    dst_height = 1;
+  }
+  int src_i210_y_size = src_width * Abs(src_height);
+  int src_i210_uv_size = ((src_width + 1) / 2) * Abs(src_height);
+  int src_i210_size = src_i210_y_size + src_i210_uv_size * 2;
+  align_buffer_page_end_16(src_i210, src_i210_size);
+  for (int i = 0; i < src_i210_size; ++i) {
+    src_i210[i] = fastrand() & 0x3ff;
+  }
+
+  int dst_i210_y_size = dst_width * dst_height;
+  int dst_i210_uv_size = ((dst_width + 1) / 2) * dst_height;
+  int dst_i210_size = dst_i210_y_size + dst_i210_uv_size * 2;
+  align_buffer_page_end_16(dst_i210_c, dst_i210_size);
+  align_buffer_page_end_16(dst_i210_opt, dst_i210_size);
+  memset(dst_i210_c, 2, dst_i210_size * 2);
+  memset(dst_i210_opt, 3, dst_i210_size * 2);
+
+  MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
+  I210Rotate(src_i210, src_width, src_i210 + src_i210_y_size,
+             (src_width + 1) / 2, src_i210 + src_i210_y_size + src_i210_uv_size,
+             (src_width + 1) / 2, dst_i210_c, dst_width,
+             dst_i210_c + dst_i210_y_size, (dst_width + 1) / 2,
+             dst_i210_c + dst_i210_y_size + dst_i210_uv_size,
+             (dst_width + 1) / 2, src_width, src_height, mode);
+
+  MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
+  for (int i = 0; i < benchmark_iterations; ++i) {
+    I210Rotate(
+        src_i210, src_width, src_i210 + src_i210_y_size, (src_width + 1) / 2,
+        src_i210 + src_i210_y_size + src_i210_uv_size, (src_width + 1) / 2,
+        dst_i210_opt, dst_width, dst_i210_opt + dst_i210_y_size,
+        (dst_width + 1) / 2, dst_i210_opt + dst_i210_y_size + dst_i210_uv_size,
+        (dst_width + 1) / 2, src_width, src_height, mode);
+  }
+
+  // Rotation should be exact.
+  for (int i = 0; i < dst_i210_size; ++i) {
+    EXPECT_EQ(dst_i210_c[i], dst_i210_opt[i]);
+  }
+
+  free_aligned_buffer_page_end_16(dst_i210_c);
+  free_aligned_buffer_page_end_16(dst_i210_opt);
+  free_aligned_buffer_page_end_16(src_i210);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate0_Opt) {
+  I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+                 benchmark_height_, kRotate0, benchmark_iterations_,
+                 disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate90_Opt) {
+  I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+                 benchmark_width_, kRotate90, benchmark_iterations_,
+                 disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate180_Opt) {
+  I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+                 benchmark_height_, kRotate180, benchmark_iterations_,
+                 disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate270_Opt) {
+  I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+                 benchmark_width_, kRotate270, benchmark_iterations_,
+                 disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+static void I410TestRotate(int src_width,
+                           int src_height,
+                           int dst_width,
+                           int dst_height,
+                           libyuv::RotationMode mode,
+                           int benchmark_iterations,
+                           int disable_cpu_flags,
+                           int benchmark_cpu_info) {
+  if (src_width < 1) {
+    src_width = 1;
+  }
+  if (src_height == 0) {
+    src_height = 1;
+  }
+  if (dst_width < 1) {
+    dst_width = 1;
+  }
+  if (dst_height < 1) {
+    dst_height = 1;
+  }
+  int src_i410_y_size = src_width * Abs(src_height);
+  int src_i410_uv_size = src_width * Abs(src_height);
+  int src_i410_size = src_i410_y_size + src_i410_uv_size * 2;
+  align_buffer_page_end_16(src_i410, src_i410_size);
+  for (int i = 0; i < src_i410_size; ++i) {
+    src_i410[i] = fastrand() & 0x3ff;
+  }
+
+  int dst_i410_y_size = dst_width * dst_height;
+  int dst_i410_uv_size = dst_width * dst_height;
+  int dst_i410_size = dst_i410_y_size + dst_i410_uv_size * 2;
+  align_buffer_page_end_16(dst_i410_c, dst_i410_size);
+  align_buffer_page_end_16(dst_i410_opt, dst_i410_size);
+  memset(dst_i410_c, 2, dst_i410_size * 2);
+  memset(dst_i410_opt, 3, dst_i410_size * 2);
+
+  MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
+  I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width,
+             src_i410 + src_i410_y_size + src_i410_uv_size, src_width,
+             dst_i410_c, dst_width, dst_i410_c + dst_i410_y_size, dst_width,
+             dst_i410_c + dst_i410_y_size + dst_i410_uv_size, dst_width,
+             src_width, src_height, mode);
+
+  MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
+  for (int i = 0; i < benchmark_iterations; ++i) {
+    I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width,
+               src_i410 + src_i410_y_size + src_i410_uv_size, src_width,
+               dst_i410_opt, dst_width, dst_i410_opt + dst_i410_y_size,
+               dst_width, dst_i410_opt + dst_i410_y_size + dst_i410_uv_size,
+               dst_width, src_width, src_height, mode);
+  }
+
+  // Rotation should be exact.
+  for (int i = 0; i < dst_i410_size; ++i) {
+    EXPECT_EQ(dst_i410_c[i], dst_i410_opt[i]);
+  }
+
+  free_aligned_buffer_page_end_16(dst_i410_c);
+  free_aligned_buffer_page_end_16(dst_i410_opt);
+  free_aligned_buffer_page_end_16(src_i410);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate0_Opt) {
+  I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+                 benchmark_height_, kRotate0, benchmark_iterations_,
+                 disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate90_Opt) {
+  I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+                 benchmark_width_, kRotate90, benchmark_iterations_,
+                 disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate180_Opt) {
+  I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+                 benchmark_height_, kRotate180, benchmark_iterations_,
+                 disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate270_Opt) {
+  I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+                 benchmark_width_, kRotate270, benchmark_iterations_,
+                 disable_cpu_flags_, benchmark_cpu_info_);
+}
+
 }  // namespace libyuv
--- a/media/libyuv/libyuv/unit_test/unit_test.h
+++ b/media/libyuv/libyuv/unit_test/unit_test.h
@ -11,6 +11,7 @@
 #ifndef UNIT_TEST_UNIT_TEST_H_  // NOLINT
 #define UNIT_TEST_UNIT_TEST_H_

+#include <stddef.h>  // For NULL
 #ifdef _WIN32
 #include <windows.h>
 #else
@ -76,7 +77,18 @@ static inline bool SizeValid(int src_width,

 #define free_aligned_buffer_page_end(var) \
  free(var##_mem);                        \
-  var = 0
+  var = NULL
+
+#define align_buffer_page_end_16(var, size)                                 \
+  uint8_t* var##_mem =                                                      \
+      reinterpret_cast<uint8_t*>(malloc(((size)*2 + 4095 + 63) & ~4095));   \
+  uint16_t* var = reinterpret_cast<uint16_t*>(                              \
+      (intptr_t)(var##_mem + (((size)*2 + 4095 + 63) & ~4095) - (size)*2) & \
+      ~63)
+
+#define free_aligned_buffer_page_end_16(var) \
+  free(var##_mem);                           \
+  var = NULL

 #ifdef WIN32
 static inline double get_time() {
--- a/media/libyuv/libyuv/util/yuvconstants.c
+++ b/media/libyuv/libyuv/util/yuvconstants.c
@ -43,9 +43,10 @@
 // #define BR (-VR * 128 + YB)

 int main(int argc, const char* argv[]) {
-  if (argc < 2) {
-    printf("yuvconstants Kr Kb\n");
-    printf("  MC BT          KR = 0.2126; KB = 0.0722\n");
+  if (argc < 3) {
+    printf("yuvconstants [KR] [KB]\n");
+    printf("  e.g. yuvconstants 0.2126 0.0722\n");
+    printf("  MC BT          KR           KB\n");
    printf("  1  BT.709      KR = 0.2126; KB = 0.0722\n");
    printf("  4  FCC         KR = 0.30;   KB = 0.11\n");
    printf("  6  BT.601      KR = 0.299;  KB = 0.114\n");
--- a/media/libyuv/moz.yaml
+++ b/media/libyuv/moz.yaml
@ -23,7 +23,7 @@ origin:

  # Revision to pull in
  # Must be a long or short commit SHA (long preferred)
-  revision: ea26d7adb1da4c1bd80e99b9d2f6e9ca0b9cde0e
+  revision: b2528b0be934de1918e20c85fc170d809eeb49ab

  # The package's license, where possible using the mnemonic from
  # https://spdx.org/licenses/