Bug 1811322 - P0 - vendor libyuv to b2528b0be934;r=webrtc-reviewers,mjf

Differential Revision: https://phabricator.services.mozilla.com/D167858
This commit is contained in:
Nico Grunbaum 2023-01-25 22:33:52 +00:00
parent 8d1005d303
commit c1bc9bcd59
36 changed files with 3177 additions and 1054 deletions

View File

@ -48,7 +48,7 @@ TARGET_LINK_LIBRARIES ( yuvconstants ${ly_lib_static} )
find_package ( JPEG )
if (JPEG_FOUND)
include_directories( ${JPEG_INCLUDE_DIR} )
target_link_libraries( yuvconvert ${JPEG_LIBRARY} )
target_link_libraries( ${ly_lib_shared} ${JPEG_LIBRARY} )
add_definitions( -DHAVE_JPEG )
endif()

View File

@ -5,7 +5,7 @@ gclient_gn_args = [
vars = {
'chromium_git': 'https://chromium.googlesource.com',
'chromium_revision': '1c174f8519b2926ff3e621467b6aa282b4934f4a',
'chromium_revision': '504c0697552240028c5412dafd2a7306a7cd4be7',
'gn_version': 'git_revision:6f13aaac55a977e1948910942675c69f2b4f7a94',
# ninja CIPD package version.
# https://chrome-infra-packages.appspot.com/p/infra/3pp/tools/ninja
@ -15,17 +15,29 @@ vars = {
# Keep the Chromium default of generating location tags.
'generate_location_tags': True,
# By default, download the fuchsia sdk from the public sdk directory.
'fuchsia_sdk_cipd_prefix': 'fuchsia/sdk/gn/',
'fuchsia_version': 'version:10.20221110.2.1',
# By default, download the fuchsia images from the fuchsia GCS bucket.
'fuchsia_images_bucket': 'fuchsia',
'checkout_fuchsia': False,
# Since the images are hundreds of MB, default to only downloading the image
# most commonly useful for developers. Bots and developers that need to use
# other images can override this with additional images.
'checkout_fuchsia_boot_images': "terminal.qemu-x64",
'checkout_fuchsia_product_bundles': '"{checkout_fuchsia_boot_images}" != ""',
}
deps = {
'src/build':
Var('chromium_git') + '/chromium/src/build' + '@' + '18e9d3c3adadf2489507e4e62afffafa46717d26',
Var('chromium_git') + '/chromium/src/build' + '@' + 'fe1231e1da1e95acb006f53d06caaad16756a376',
'src/buildtools':
Var('chromium_git') + '/chromium/src/buildtools' + '@' + '33b52eafd539278600d34cd9ba23550d28c933d2',
Var('chromium_git') + '/chromium/src/buildtools' + '@' + '3c8fef071edb88facb7508060e978c5fb8608dd5',
'src/testing':
Var('chromium_git') + '/chromium/src/testing' + '@' + 'aedf4723b9fcaf5a76164085f4a8e9797eee4bee',
Var('chromium_git') + '/chromium/src/testing' + '@' + 'b4dc828e84ae95e1f5bf855f040c065287dac335',
'src/third_party':
Var('chromium_git') + '/chromium/src/third_party' + '@' + 'd6591989fa347099fd4c7d47ba8bf6ce900b4f8e',
Var('chromium_git') + '/chromium/src/third_party' + '@' + '73f7282fa28ca1fbe8401e391207fb6ccf34767f',
'src/buildtools/linux64': {
'packages': [
@ -71,30 +83,30 @@ deps = {
'src/buildtools/clang_format/script':
Var('chromium_git') + '/external/github.com/llvm/llvm-project/clang/tools/clang-format.git' + '@' + '8b525d2747f2584fc35d8c7e612e66f377858df7',
'src/buildtools/third_party/libc++/trunk':
Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + 'fc6bbc5eb039769b5ed2de84444a3c6f9b45a598',
Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + 'cd0a05047451dfbdef5ba85f97ac4888e432a377',
'src/buildtools/third_party/libc++abi/trunk':
Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '8dd405113a4f3694e910b79785dd7fb7535a888a',
Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '1a32724f721e1c3b6c590a07fe4a954344f15e48',
'src/buildtools/third_party/libunwind/trunk':
Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + 'aabcd8753678f1536e15eb6385a948470debdae4',
Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + '5870472fdd17f33d923b02e3e0acb9cbb18dbc9a',
'src/third_party/catapult':
Var('chromium_git') + '/catapult.git' + '@' + '3ffa6b222803f54188a7b249383b2f092a24d19a',
Var('chromium_git') + '/catapult.git' + '@' + '4efb51be8574f2969273012958eaae85d01ede0b',
'src/third_party/colorama/src':
Var('chromium_git') + '/external/colorama.git' + '@' + '799604a1041e9b3bc5d2789ecbd7e8db2e18e6b8',
'src/third_party/depot_tools':
Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + 'b52683fa2e74087464d32a1a9c76bf1b5275e4fe',
Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + '2fc7e1ffd58b00601b47a5126201e5162911e244',
'src/third_party/freetype/src':
Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + 'dea2e6358b2f963008d447d27564dd79890b61f0',
Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + '1c44de209cb465d175279dc30cd95f9857f703dd',
'src/third_party/googletest/src':
Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + 'af29db7ec28d6df1c7f0f745186884091e602e07',
'src/third_party/harfbuzz-ng/src':
Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '56c467093598ec559a7148b61e112e9de52b7076',
Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '2822b589bc837fae6f66233e2cf2eef0f6ce8470',
'src/third_party/libjpeg_turbo':
Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + 'ed683925e4897a84b3bffc5c1414c85b97a129a3',
'src/third_party/nasm':
Var('chromium_git') + '/chromium/deps/nasm.git' + '@' + '0873b2bae6a5388a1c55deac8456e3c60a47ca08',
'src/tools':
Var('chromium_git') + '/chromium/src/tools' + '@' + 'a185bbc6c077438a59a89a97c6c6ae30895e976c',
Var('chromium_git') + '/chromium/src/tools' + '@' + 'a20d904d021175f221bf58921a5a67fd48420ed9',
# libyuv-only dependencies (not present in Chromium).
'src/third_party/gtest-parallel':
@ -116,14 +128,10 @@ deps = {
'condition': 'checkout_android',
'dep_type': 'cipd',
},
'src/third_party/auto/src': {
'url': Var('chromium_git') + '/external/github.com/google/auto.git' + '@' + '3659a0e6436d3acfeda04e0bd1df3603f1e7ffac',
'condition': 'checkout_android',
},
'src/third_party/boringssl/src':
'https://boringssl.googlesource.com/boringssl.git' + '@' + '1ee71185a2322dc354bee5e5a0abfb1810a27dc6',
'https://boringssl.googlesource.com/boringssl.git' + '@' + 'f0518d45119dd4dd322a884669daf8247bc3c992',
'src/base': {
'url': Var('chromium_git') + '/chromium/src/base' + '@' + '077682171b88d0aa0cb77a8e1cd4d959f58a20a3',
'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'f80120ff3265ba9bcb27416cc489343cfdc8bc61',
'condition': 'checkout_android',
},
'src/third_party/bazel': {
@ -288,7 +296,7 @@ deps = {
},
'src/third_party/icu': {
'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'da07448619763d1cde255b361324242646f5b268',
'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + '1b7d391f0528fb3a4976b7541b387ee04f915f83',
},
'src/third_party/icu4j': {
'packages': [
@ -329,7 +337,7 @@ deps = {
'condition': 'checkout_android',
},
'src/third_party/junit/src': {
'url': Var('chromium_git') + '/external/junit.git' + '@' + '64155f8a9babcfcf4263cf4d08253a1556e75481',
'url': Var('chromium_git') + '/external/junit.git' + '@' + '05fe2a64f59127c02135be22f416e91260d6ede6',
'condition': 'checkout_android',
},
'src/third_party/libunwindstack': {
@ -443,7 +451,7 @@ deps = {
# iOS deps:
'src/ios': {
'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '211070da56a62cf7d2f7c7a81be29b57294c4343',
'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '866ec86ecb27dad8a3ac7957590df7765a13834f',
'condition': 'checkout_ios'
},
@ -2245,29 +2253,74 @@ hooks = [
'condition': 'checkout_mac',
},
{
'name': 'msan_chained_origins',
'name': 'msan_chained_origins_focal',
'pattern': '.',
'condition': 'checkout_instrumented_libraries',
'action': [ 'python3',
'src/third_party/depot_tools/download_from_google_storage.py',
"--no_resume",
"--no_auth",
"--bucket", "chromium-instrumented-libraries",
"-s", "src/third_party/instrumented_libraries/binaries/msan-chained-origins.tgz.sha1",
'--no_resume',
'--no_auth',
'--bucket', 'chromium-instrumented-libraries',
'-s', 'src/third_party/instrumented_libraries/binaries/msan-chained-origins-focal.tgz.sha1',
],
},
{
'name': 'msan_no_origins',
'name': 'msan_no_origins_focal',
'pattern': '.',
'condition': 'checkout_instrumented_libraries',
'action': [ 'python3',
'src/third_party/depot_tools/download_from_google_storage.py',
"--no_resume",
"--no_auth",
"--bucket", "chromium-instrumented-libraries",
"-s", "src/third_party/instrumented_libraries/binaries/msan-no-origins.tgz.sha1",
'--no_resume',
'--no_auth',
'--bucket', 'chromium-instrumented-libraries',
'-s', 'src/third_party/instrumented_libraries/binaries/msan-no-origins-focal.tgz.sha1',
],
},
{
'name': 'msan_chained_origins_xenial',
'pattern': '.',
'condition': 'checkout_instrumented_libraries',
'action': [ 'python3',
'src/third_party/depot_tools/download_from_google_storage.py',
'--no_resume',
'--no_auth',
'--bucket', 'chromium-instrumented-libraries',
'-s', 'src/third_party/instrumented_libraries/binaries/msan-chained-origins-xenial.tgz.sha1',
],
},
{
'name': 'msan_no_origins_xenial',
'pattern': '.',
'condition': 'checkout_instrumented_libraries',
'action': [ 'python3',
'src/third_party/depot_tools/download_from_google_storage.py',
'--no_resume',
'--no_auth',
'--bucket', 'chromium-instrumented-libraries',
'-s', 'src/third_party/instrumented_libraries/binaries/msan-no-origins-xenial.tgz.sha1',
],
},
{
'name': 'Download Fuchsia SDK from GCS',
'pattern': '.',
'condition': 'checkout_fuchsia',
'action': [
'python3',
'src/build/fuchsia/update_sdk.py',
'--cipd-prefix={fuchsia_sdk_cipd_prefix}',
'--version={fuchsia_version}',
],
},
{
'name': 'Download Fuchsia system images',
'pattern': '.',
'condition': 'checkout_fuchsia and checkout_fuchsia_product_bundles',
'action': [
'python3',
'src/build/fuchsia/update_product_bundles.py',
'{checkout_fuchsia_boot_images}',
],
},
{
# Pull clang if needed or requested via GYP_DEFINES.
# Note: On Win, this should run after win_toolchain, as it may use it.

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1850
Version: 1857
License: BSD
License File: LICENSE

View File

@ -162,6 +162,22 @@ int MM21ToYUY2(const uint8_t* src_y,
int width,
int height);
// Convert MT2T to P010
// Note that src_y and src_uv point to packed 10-bit values, so the Y plane will
// be 10 / 8 times the dimensions of the image. Also for this reason,
// src_stride_y and src_stride_uv are given in bytes.
LIBYUV_API
int MT2TToP010(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert I422 to NV21.
LIBYUV_API
int I422ToNV21(const uint8_t* src_y,
@ -283,6 +299,23 @@ int I210ToI422(const uint16_t* src_y,
int width,
int height);
#define H410ToH420 I410ToI420
LIBYUV_API
int I410ToI420(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define H410ToH444 I410ToI444
LIBYUV_API
int I410ToI444(const uint16_t* src_y,
@ -571,6 +604,36 @@ int NV16ToNV24(const uint8_t* src_y,
int width,
int height);
// Convert P010 to I010.
LIBYUV_API
int P010ToI010(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert P012 to I012.
LIBYUV_API
int P012ToI012(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert P010 to P410.
LIBYUV_API
int P010ToP410(const uint16_t* src_y,

View File

@ -392,6 +392,24 @@ int I210Copy(const uint16_t* src_y,
int width,
int height);
// Copy I410 to I410.
#define I410ToI410 I410Copy
LIBYUV_API
int I410Copy(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Copy NV12. Supports inverting.
LIBYUV_API
int NV12Copy(const uint8_t* src_y,

View File

@ -85,6 +85,60 @@ int I444Rotate(const uint8_t* src_y,
int height,
enum RotationMode mode);
// Rotate I010 frame.
LIBYUV_API
int I010Rotate(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode);
// Rotate I210 frame.
LIBYUV_API
int I210Rotate(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode);
// Rotate I410 frame.
LIBYUV_API
int I410Rotate(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode);
// Rotate NV12 input and store in I420.
LIBYUV_API
int NV12ToI420Rotate(const uint8_t* src_y,
@ -156,6 +210,16 @@ void RotatePlane270(const uint8_t* src,
int width,
int height);
// Rotate a plane by 0, 90, 180, or 270.
LIBYUV_API
int RotatePlane_16(const uint16_t* src,
int src_stride,
uint16_t* dst,
int dst_stride,
int width,
int height,
enum RotationMode mode);
// Rotations for when U and V are interleaved.
// These functions take one UV input pointer and
// split the data into two buffers while

View File

@ -215,7 +215,23 @@ void TransposeUVWx16_Any_LSX(const uint8_t* src,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeWxH_16_C(const uint16_t* src,
int src_stride,
uint16_t* dst,
int dst_stride,
int width,
int height);
void TransposeWx8_16_C(const uint16_t* src,
int src_stride,
uint16_t* dst,
int dst_stride,
int width);
void TransposeWx1_16_C(const uint16_t* src,
int src_stride,
uint16_t* dst,
int dst_stride,
int width);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -11,7 +11,8 @@
#ifndef INCLUDE_LIBYUV_ROW_H_
#define INCLUDE_LIBYUV_ROW_H_
#include <stdlib.h> // For malloc.
#include <stddef.h> // For NULL
#include <stdlib.h> // For malloc
#include "libyuv/basic_types.h"
@ -176,9 +177,8 @@ extern "C" {
// The following functions fail on gcc/clang 32 bit with fpic and framepointer.
// caveat: clangcl uses row_win.cc which works.
#if !defined(MOZ_PROFILING) && \
(defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
defined(_MSC_VER))
#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
defined(_MSC_VER)
// TODO(fbarchard): fix build error on android_full_debug=1
// https://code.google.com/p/libyuv/issues/detail?id=517
#define HAS_I422ALPHATOARGBROW_SSSE3
@ -247,9 +247,8 @@ extern "C" {
#define HAS_ARGBATTENUATEROW_AVX2
#endif
#if !defined(MOZ_PROFILING) && \
(defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
defined(_MSC_VER))
#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
defined(_MSC_VER)
// TODO(fbarchard): fix build error on android_full_debug=1
// https://code.google.com/p/libyuv/issues/detail?id=517
#define HAS_I422ALPHATOARGBROW_AVX2
@ -457,6 +456,7 @@ extern "C" {
#define HAS_DETILEROW_NEON
#define HAS_DETILESPLITUVROW_NEON
#define HAS_DETILETOYUY2_NEON
#define HAS_UNPACKMT2T_NEON
#define HAS_DIVIDEROW_16_NEON
#define HAS_HALFFLOATROW_NEON
#define HAS_HALFMERGEUVROW_NEON
@ -686,6 +686,11 @@ extern "C" {
#define HAS_SPLITUVROW_LSX
#define HAS_UYVYTOARGBROW_LSX
#define HAS_YUY2TOARGBROW_LSX
#define HAS_ARGBTOYROW_LSX
#define HAS_ABGRTOYJROW_LSX
#define HAS_RGBATOYJROW_LSX
#define HAS_RGB24TOYJROW_LSX
#define HAS_RAWTOYJROW_LSX
#endif
#if !defined(LIBYUV_DISABLE_LASX) && defined(__loongarch_asx)
@ -713,6 +718,8 @@ extern "C" {
#define HAS_ARGBTOUVROW_LASX
#define HAS_ARGBTOYJROW_LASX
#define HAS_ARGBTOYROW_LASX
#define HAS_ABGRTOYJROW_LASX
#define HAS_ABGRTOYROW_LASX
#define HAS_I422ALPHATOARGBROW_LASX
#define HAS_I422TOARGB1555ROW_LASX
#define HAS_I422TOARGB4444ROW_LASX
@ -742,6 +749,11 @@ extern "C" {
#define HAS_YUY2TOUV422ROW_LASX
#define HAS_YUY2TOUVROW_LASX
#define HAS_YUY2TOYROW_LASX
#define HAS_RGBATOYROW_LASX
#define HAS_RGBATOYJROW_LASX
#define HAS_BGRATOYROW_LASX
#define HAS_RGB24TOYJROW_LASX
#define HAS_RAWTOYJROW_LASX
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
@ -830,13 +842,21 @@ struct YuvConstants {
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
#define align_buffer_64(var, size) \
uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \
uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */
#define align_buffer_64(var, size) \
void* var##_mem = malloc((size) + 63); /* NOLINT */ \
uint8_t* var = (uint8_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */
#define free_aligned_buffer_64(var) \
free(var##_mem); \
var = 0
var = NULL
#define align_buffer_64_16(var, size) \
void* var##_mem = malloc((size)*2 + 63); /* NOLINT */ \
uint16_t* var = (uint16_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */
#define free_aligned_buffer_64_16(var) \
free(var##_mem); \
var = NULL
#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
#define OMITFP
@ -1193,9 +1213,14 @@ void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYJRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ABGRToYJRow_LSX(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
void RGBAToYJRow_LSX(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
void ARGBToYJRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ABGRToYJRow_LASX(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
void RGBAToYJRow_LASX(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
void ARGBToUV444Row_NEON(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
@ -1419,6 +1444,8 @@ void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width);
void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width);
void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
void RGB24ToYJRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
void ABGRToYRow_LASX(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void ARGB1555ToYRow_LASX(const uint8_t* src_argb1555,
uint8_t* dst_y,
int width);
@ -1428,6 +1455,11 @@ void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width);
void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width);
void RGBAToYRow_LASX(const uint8_t* src_rgba, uint8_t* dst_y, int width);
void BGRAToYRow_LASX(const uint8_t* src_bgra, uint8_t* dst_y, int width);
void RGB24ToYJRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
void RAWToYJRow_LSX(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void ARGBToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void ARGBToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
@ -1491,10 +1523,15 @@ void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr,
void BGRAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB565ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGB1555ToYRow_Any_LSX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@ -1503,7 +1540,14 @@ void RGB565ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void BGRAToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGB1555ToYRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@ -1935,6 +1979,8 @@ void MirrorSplitUVRow_C(const uint8_t* src_uv,
uint8_t* dst_v,
int width);
void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width);
void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
@ -2124,6 +2170,8 @@ void DetileToYUY2_Any_NEON(const uint8_t* src_y,
ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2,
int width);
void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size);
void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size);
void MergeUVRow_C(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,

View File

@ -214,6 +214,17 @@ void ScalePlaneVertical_16To8(int src_height,
int scale,
enum FilterMode filtering);
void ScalePlaneDown2_16To8(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint8_t* dst_ptr,
int scale,
enum FilterMode filtering);
// Simplify the filtering based on scale factors.
enum FilterMode ScaleFilterReduce(int src_width,
int src_height,
@ -259,6 +270,16 @@ void ScaleRowDown2_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale);
void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale);
void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@ -267,6 +288,16 @@ void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale);
void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale);
void ScaleRowDown2Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@ -279,6 +310,16 @@ void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale);
void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale);
void ScaleRowDown4_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1850
#define LIBYUV_VERSION 1857
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -52,7 +52,7 @@
'optimize': 'max', # enable O2 and ltcg.
},
# Allows libyuv.a redistributable library without external dependencies.
# 'standalone_static_library': 1,
'standalone_static_library': 1,
'conditions': [
# Disable -Wunused-parameter
['clang == 1', {
@ -70,11 +70,6 @@
'-mfpu=vfpv3-d16',
# '-mthumb', # arm32 not thumb
],
'cflags_mozilla!': [
'-mfpu=vfp',
'-mfpu=vfpv3',
'-mfpu=vfpv3-d16',
],
'conditions': [
# Disable LTO in libyuv_neon target due to gcc 4.9 compiler bug.
['clang == 0 and use_lto == 1', {
@ -89,9 +84,6 @@
'-mfpu=neon',
# '-marm', # arm32 not thumb
],
'cflags_mozilla': [
'-mfpu=neon',
],
}],
],
}],
@ -100,15 +92,7 @@
'LIBYUV_MSA',
],
}],
['build_with_mozilla == 1', {
'defines': [
'HAVE_JPEG'
],
'cflags_mozilla': [
'$(MOZ_JPEG_CFLAGS)',
],
}],
['OS != "ios" and libyuv_disable_jpeg != 1 and build_with_mozilla != 1', {
['OS != "ios" and libyuv_disable_jpeg != 1', {
'defines': [
'HAVE_JPEG'
],

View File

@ -45,7 +45,7 @@ uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) {
}
#endif
while (count >= (uint64_t)(kBlockSize)) {
while (count >= (uint64_t)kBlockSize) {
seed = HashDjb2_SSE(src, kBlockSize, seed);
src += kBlockSize;
count -= kBlockSize;
@ -359,10 +359,10 @@ static double Ssim8x8_C(const uint8_t* src_a,
(sum_a_sq + sum_b_sq + c1) *
(count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2);
if (ssim_d == 0.0) {
if (ssim_d == 0) {
return DBL_MAX;
}
return ssim_n * 1.0 / ssim_d;
return (double)ssim_n / (double)ssim_d;
}
}

View File

@ -67,7 +67,7 @@ uint32_t HammingDistance_SSE42(const uint8_t* src_a,
:
: "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10");
return static_cast<uint32_t>(diff);
return (uint32_t)(diff);
}
#else
uint32_t HammingDistance_SSE42(const uint8_t* src_a,

View File

@ -24,6 +24,10 @@ namespace libyuv {
extern "C" {
#endif
// Subsample amount uses a shift.
// v is value
// a is amount to add to round up
// s is shift to subsample down
#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
@ -291,6 +295,52 @@ int I210ToI422(const uint16_t* src_y,
0, 10);
}
LIBYUV_API
int I410ToI420(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
const int depth = 10;
const int scale = 1 << (24 - depth);
if (width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
{
const int uv_width = SUBSAMPLE(width, 1, 1);
const int uv_height = SUBSAMPLE(height, 1, 1);
Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width,
height);
ScalePlaneDown2_16To8(width, height, uv_width, uv_height, src_stride_u,
dst_stride_u, src_u, dst_u, scale, kFilterBilinear);
ScalePlaneDown2_16To8(width, height, uv_width, uv_height, src_stride_v,
dst_stride_v, src_v, dst_v, scale, kFilterBilinear);
}
return 0;
}
LIBYUV_API
int I410ToI444(const uint16_t* src_y,
int src_stride_y,
@ -732,6 +782,92 @@ int MM21ToYUY2(const uint8_t* src_y,
return 0;
}
// Convert MT2T into P010. See tinyurl.com/mtk-10bit-video-format for format
// documentation.
// TODO(greenjustin): Add an MT2T to I420 conversion.
LIBYUV_API
int MT2TToP010(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
if (width <= 0 || !height || !src_uv || !dst_uv) {
return -1;
}
{
int u_width = (width + 1) / 2;
int uv_width = 2 * u_width;
int y = 0;
int uv_height = uv_height = (height + 1) / 2;
const int tile_width = 16;
const int y_tile_height = 32;
const int uv_tile_height = 16;
int padded_width = (width + tile_width - 1) & ~(tile_width - 1);
int y_tile_row_size = padded_width * y_tile_height * 10 / 8;
int uv_tile_row_size = padded_width * uv_tile_height * 10 / 8;
size_t row_buf_size = padded_width * y_tile_height * sizeof(uint16_t);
void (*UnpackMT2T)(const uint8_t* src, uint16_t* dst, size_t size) =
UnpackMT2T_C;
align_buffer_64(row_buf, row_buf_size);
#if defined(HAS_UNPACKMT2T_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
UnpackMT2T = UnpackMT2T_NEON;
}
#endif
// Negative height means invert the image.
if (height < 0) {
height = -height;
uv_height = (height + 1) / 2;
if (dst_y) {
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_stride_y = -dst_stride_y;
}
dst_uv = dst_uv + (uv_height - 1) * dst_stride_uv;
dst_stride_uv = -dst_stride_uv;
}
// Unpack and detile Y in rows of tiles
if (src_y && dst_y) {
for (y = 0; y < (height & ~(y_tile_height - 1)); y += y_tile_height) {
UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
width, y_tile_height, y_tile_height);
src_y += src_stride_y * y_tile_height;
dst_y += dst_stride_y * y_tile_height;
}
if (height & (y_tile_height - 1)) {
UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
width, height & (y_tile_height - 1), y_tile_height);
}
}
// Unpack and detile UV plane
for (y = 0; y < (uv_height & ~(uv_tile_height - 1)); y += uv_tile_height) {
UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
uv_width, uv_tile_height, uv_tile_height);
src_uv += src_stride_uv * uv_tile_height;
dst_uv += dst_stride_uv * uv_tile_height;
}
if (uv_height & (uv_tile_height - 1)) {
UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
uv_width, uv_height & (uv_tile_height - 1),
uv_tile_height);
}
free_aligned_buffer_64(row_buf);
}
return 0;
}
#ifdef I422TONV21_ROW_VERSION
// Unittest fails for this version.
// 422 chroma is 1/2 width, 1x height
@ -753,7 +889,7 @@ int I422ToNV21(const uint8_t* src_y,
int y;
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
uint8_t* dst_uv, int width) = MergeUVRow_C;
void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
int halfwidth = (width + 1) >> 1;
@ -1137,6 +1273,70 @@ int NV16ToNV24(const uint8_t* src_y,
return 0;
}
// Any P[420]1[02] to I[420]1[02] format with mirroring.
static int PxxxToIxxx(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height,
int subsample_x,
int subsample_y,
int depth) {
const int uv_width = SUBSAMPLE(width, subsample_x, subsample_x);
const int uv_height = SUBSAMPLE(height, subsample_y, subsample_y);
if (width <= 0 || height == 0) {
return -1;
}
ConvertToLSBPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height,
depth);
SplitUVPlane_16(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
dst_stride_v, uv_width, uv_height, depth);
return 0;
}
LIBYUV_API
int P010ToI010(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height) {
return PxxxToIxxx(src_y, src_stride_y, src_uv, src_stride_uv, dst_y,
dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
width, height, 1, 1, 10);
}
LIBYUV_API
int P012ToI012(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height) {
return PxxxToIxxx(src_y, src_stride_y, src_uv, src_stride_uv, dst_y,
dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
width, height, 1, 1, 12);
}
LIBYUV_API
int P010ToP410(const uint16_t* src_y,
int src_stride_y,
@ -1593,6 +1793,14 @@ int ARGBToI420(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@ -1707,13 +1915,21 @@ int BGRAToI420(const uint8_t* src_bgra,
}
}
#endif
#if defined(HAS_BGRATOYROW_LASX) && defined(HAS_BGRATOUVROW_LASX)
#if defined(HAS_BGRATOYROW_LSX) && defined(HAS_BGRATOUVROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
BGRAToYRow = BGRAToYRow_Any_LSX;
BGRAToUVRow = BGRAToUVRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
BGRAToYRow = BGRAToYRow_LSX;
BGRAToUVRow = BGRAToUVRow_LSX;
}
}
#endif
#if defined(HAS_BGRATOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
BGRAToYRow = BGRAToYRow_Any_LASX;
BGRAToUVRow = BGRAToUVRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
BGRAToYRow = BGRAToYRow_LASX;
BGRAToUVRow = BGRAToUVRow_LASX;
}
}
#endif
@ -1829,6 +2045,14 @@ int ABGRToI420(const uint8_t* src_abgr,
}
}
#endif
#if defined(HAS_ABGRTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ABGRToYRow = ABGRToYRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ABGRToYRow = ABGRToYRow_LASX;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
@ -1925,6 +2149,14 @@ int RGBAToI420(const uint8_t* src_rgba,
}
}
#endif
#if defined(HAS_RGBATOYROW_LASX)
if (TestCpuFlag(kCpuHasNEON)) {
RGBAToYRow = RGBAToYRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RGBAToYRow = RGBAToYRow_LASX;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
@ -2183,6 +2415,22 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
}
}
#endif
#if defined(HAS_RGB24TOYJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
RGB24ToYJRow = RGB24ToYJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
RGB24ToYJRow = RGB24ToYJRow_LSX;
}
}
#endif
#if defined(HAS_RGB24TOYJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
RGB24ToYJRow = RGB24ToYJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RGB24ToYJRow = RGB24ToYJRow_LASX;
}
}
#endif
// Other platforms do intermediate conversion from RGB24 to ARGB.
#else // HAS_RGB24TOYJROW
@ -2511,6 +2759,22 @@ int RAWToJ420(const uint8_t* src_raw,
}
}
#endif
#if defined(HAS_RAWTOYJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
RAWToYJRow = RAWToYJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
RAWToYJRow = RAWToYJRow_LSX;
}
}
#endif
#if defined(HAS_RAWTOYJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
RAWToYJRow = RAWToYJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RAWToYJRow = RAWToYJRow_LASX;
}
}
#endif
// Other platforms do intermediate conversion from RAW to ARGB.
#else // HAS_RAWTOYJROW
@ -3098,6 +3362,14 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@ -3210,6 +3482,22 @@ int RGB24ToJ400(const uint8_t* src_rgb24,
}
}
#endif
#if defined(HAS_RGB24TOYJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
RGB24ToYJRow = RGB24ToYJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
RGB24ToYJRow = RGB24ToYJRow_LSX;
}
}
#endif
#if defined(HAS_RGB24TOYJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
RGB24ToYJRow = RGB24ToYJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RGB24ToYJRow = RGB24ToYJRow_LASX;
}
}
#endif
for (y = 0; y < height; ++y) {
RGB24ToYJRow(src_rgb24, dst_yj, width);
@ -3278,6 +3566,22 @@ int RAWToJ400(const uint8_t* src_raw,
}
}
#endif
#if defined(HAS_RAWTOYJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
RAWToYJRow = RAWToYJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
RAWToYJRow = RAWToYJRow_LSX;
}
}
#endif
#if defined(HAS_RAWTOYJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
RAWToYJRow = RAWToYJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RAWToYJRow = RAWToYJRow_LASX;
}
}
#endif
for (y = 0; y < height; ++y) {
RAWToYJRow(src_raw, dst_yj, width);

View File

@ -116,6 +116,14 @@ int ARGBToI444(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@ -230,7 +238,14 @@ int ARGBToI422(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@ -340,6 +355,14 @@ int ARGBToNV12(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@ -502,6 +525,14 @@ int ARGBToNV21(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@ -663,6 +694,22 @@ int ABGRToNV12(const uint8_t* src_abgr,
}
}
#endif
#if defined(HAS_ABGRTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ABGRToYRow = ABGRToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ABGRToYRow = ABGRToYRow_LSX;
}
}
#endif
#if defined(HAS_ABGRTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ABGRToYRow = ABGRToYRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ABGRToYRow = ABGRToYRow_LASX;
}
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
@ -815,6 +862,22 @@ int ABGRToNV21(const uint8_t* src_abgr,
}
}
#endif
#if defined(HAS_ABGRTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ABGRToYRow = ABGRToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ABGRToYRow = ABGRToYRow_LSX;
}
}
#endif
#if defined(HAS_ABGRTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ABGRToYRow = ABGRToYRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ABGRToYRow = ABGRToYRow_LASX;
}
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
@ -972,6 +1035,14 @@ int ARGBToYUY2(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@ -1135,6 +1206,14 @@ int ARGBToUYVY(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@ -1262,6 +1341,14 @@ int ARGBToI400(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@ -1939,6 +2026,16 @@ int ARGBToJ420(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYJROW_LSX) && defined(HAS_ARGBTOUVJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYJRow = ARGBToYJRow_Any_LSX;
ARGBToUVJRow = ARGBToUVJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_LSX;
ARGBToUVJRow = ARGBToUVJRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_LASX) && defined(HAS_ARGBTOUVJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYJRow = ARGBToYJRow_Any_LASX;
@ -2215,6 +2312,22 @@ int RGBAToJ400(const uint8_t* src_rgba,
}
}
#endif
#if defined(HAS_RGBATOYJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
RGBAToYJRow = RGBAToYJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
RGBAToYJRow = RGBAToYJRow_LSX;
}
}
#endif
#if defined(HAS_RGBATOYJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
RGBAToYJRow = RGBAToYJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RGBAToYJRow = RGBAToYJRow_LASX;
}
}
#endif
for (y = 0; y < height; ++y) {
RGBAToYJRow(src_rgba, dst_yj, width);
@ -2309,13 +2422,19 @@ int ABGRToJ420(const uint8_t* src_abgr,
}
}
#endif
#if defined(HAS_ABGRTOYJROW_LSX) && defined(HAS_ABGRTOUVJROW_LSX)
#if defined(HAS_ABGRTOYJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ABGRToYJRow = ABGRToYJRow_Any_LSX;
ABGRToUVJRow = ABGRToUVJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ABGRToYJRow = ABGRToYJRow_LSX;
ABGRToUVJRow = ABGRToUVJRow_LSX;
}
}
#endif
#if defined(HAS_ABGRTOYJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ABGRToYJRow = ABGRToYJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ABGRToYJRow = ABGRToYJRow_LASX;
}
}
#endif
@ -2430,23 +2549,19 @@ int ABGRToJ422(const uint8_t* src_abgr,
}
}
#endif
#if defined(HAS_ABGRTOYJROW_LSX) && defined(HAS_ABGRTOUVJROW_LSX)
#if defined(HAS_ABGRTOYJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ABGRToYJRow = ABGRToYJRow_Any_LSX;
ABGRToUVJRow = ABGRToUVJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ABGRToYJRow = ABGRToYJRow_LSX;
ABGRToUVJRow = ABGRToUVJRow_LSX;
}
}
#endif
#if defined(HAS_ABGRTOYJROW_LASX) && defined(HAS_ABGRTOUVJROW_LASX)
#if defined(HAS_ABGRTOYJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ABGRToYJRow = ABGRToYJRow_Any_LASX;
ABGRToUVJRow = ABGRToUVJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ABGRToYJRow = ABGRToYJRow_LASX;
ABGRToUVJRow = ABGRToUVJRow_LASX;
}
}
#endif
@ -2519,6 +2634,22 @@ int ABGRToJ400(const uint8_t* src_abgr,
}
}
#endif
#if defined(HAS_ABGRTOYJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ABGRToYJRow = ABGRToYJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ABGRToYJRow = ABGRToYJRow_LSX;
}
}
#endif
#if defined(HAS_ABGRTOYJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ABGRToYJRow = ABGRToYJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ABGRToYJRow = ABGRToYJRow_LASX;
}
}
#endif
for (y = 0; y < height; ++y) {
ABGRToYJRow(src_abgr, dst_yj, width);
@ -2713,6 +2844,22 @@ int RAWToJNV21(const uint8_t* src_raw,
}
}
#endif
#if defined(HAS_RAWTOYJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
RAWToYJRow = RAWToYJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
RAWToYJRow = RAWToYJRow_LSX;
}
}
#endif
#if defined(HAS_RAWTOYJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
RAWToYJRow = RAWToYJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RAWToYJRow = RAWToYJRow_LASX;
}
}
#endif
// Other platforms do intermediate conversion from RAW to ARGB.
#else // HAS_RAWTOYJROW

View File

@ -79,9 +79,7 @@ MJpegDecoder::MJpegDecoder()
decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
// Override standard exit()-based error handler.
error_mgr_->base.error_exit = &ErrorHandler;
#ifndef DEBUG_MJPEG
error_mgr_->base.output_message = &OutputHandler;
#endif
#endif
decompress_struct_->client_data = NULL;
source_mgr_->init_source = &init_source;
@ -111,7 +109,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8_t* src, size_t src_len) {
}
buf_.data = src;
buf_.len = static_cast<int>(src_len);
buf_.len = (int)src_len;
buf_vec_.pos = 0;
decompress_struct_->client_data = &buf_vec_;
#ifdef HAVE_SETJMP
@ -430,7 +428,7 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {
void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
jpeg_source_mgr* src = cinfo->src;
size_t bytes = static_cast<size_t>(num_bytes);
size_t bytes = (size_t)num_bytes;
if (bytes > src->bytes_in_buffer) {
src->next_input_byte = nullptr;
src->bytes_in_buffer = 0;
@ -465,12 +463,11 @@ void ErrorHandler(j_common_ptr cinfo) {
longjmp(mgr->setjmp_buffer, 1);
}
#ifndef DEBUG_MJPEG
// Suppress fprintf warnings.
void OutputHandler(j_common_ptr cinfo) {
(void)cinfo;
}
#endif
#endif // HAVE_SETJMP
void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {

View File

@ -333,6 +333,45 @@ int I210Copy(const uint16_t* src_y,
return 0;
}
// Copy I410.
LIBYUV_API
int I410Copy(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height) {
if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
if (dst_y) {
CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
}
// Copy I400.
LIBYUV_API
int I400ToI400(const uint8_t* src_y,
@ -3196,6 +3235,7 @@ int RAWToRGB24(const uint8_t* src_raw,
return 0;
}
// TODO(fbarchard): Consider uint8_t value
LIBYUV_API
void SetPlane(uint8_t* dst_y,
int dst_stride_y,
@ -3203,7 +3243,7 @@ void SetPlane(uint8_t* dst_y,
int height,
uint32_t value) {
int y;
void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C;
void (*SetRow)(uint8_t* dst, uint8_t value, int width) = SetRow_C;
if (width <= 0 || height == 0) {
return;
@ -3256,7 +3296,7 @@ void SetPlane(uint8_t* dst_y,
// Set plane
for (y = 0; y < height; ++y) {
SetRow(dst_y, value, width);
SetRow(dst_y, (uint8_t)value, width);
dst_y += dst_stride_y;
}
}
@ -3304,7 +3344,7 @@ int ARGBRect(uint8_t* dst_argb,
int height,
uint32_t value) {
int y;
void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) =
void (*ARGBSetRow)(uint8_t* dst_argb, uint32_t value, int width) =
ARGBSetRow_C;
if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
return -1;
@ -3609,7 +3649,7 @@ int ARGBSepia(uint8_t* dst_argb,
int width,
int height) {
int y;
void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C;
void (*ARGBSepiaRow)(uint8_t* dst_argb, int width) = ARGBSepiaRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
return -1;
@ -3752,7 +3792,7 @@ int ARGBColorTable(uint8_t* dst_argb,
int width,
int height) {
int y;
void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
void (*ARGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
int width) = ARGBColorTableRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
@ -3788,7 +3828,7 @@ int RGBColorTable(uint8_t* dst_argb,
int width,
int height) {
int y;
void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
void (*RGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
int width) = RGBColorTableRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
@ -3833,7 +3873,7 @@ int ARGBQuantize(uint8_t* dst_argb,
int width,
int height) {
int y;
void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size,
void (*ARGBQuantizeRow)(uint8_t* dst_argb, int scale, int interval_size,
int interval_offset, int width) = ARGBQuantizeRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
@ -4086,7 +4126,7 @@ int InterpolatePlane(const uint8_t* src0,
int height,
int interpolation) {
int y;
void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
@ -4166,7 +4206,7 @@ int InterpolatePlane_16(const uint16_t* src0,
int height,
int interpolation) {
int y;
void (*InterpolateRow_16)(uint16_t * dst_ptr, const uint16_t* src_ptr,
void (*InterpolateRow_16)(uint16_t* dst_ptr, const uint16_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_16_C;
if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
@ -5281,7 +5321,7 @@ int UYVYToNV12(const uint8_t* src_uyvy,
int halfwidth = (width + 1) >> 1;
void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
int width) = SplitUVRow_C;
void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;

View File

@ -138,7 +138,7 @@ void RotatePlane180(const uint8_t* src,
int dst_stride,
int width,
int height) {
// Swap first and last row and mirror the content. Uses a temporary row.
// Swap top and bottom row and mirror the content. Uses a temporary row.
align_buffer_64(row, width);
const uint8_t* src_bot = src + src_stride * (height - 1);
uint8_t* dst_bot = dst + dst_stride * (height - 1);
@ -209,9 +209,9 @@ void RotatePlane180(const uint8_t* src,
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
CopyRow(src, row, width); // Copy first row into buffer
MirrorRow(src_bot, dst, width); // Mirror last row into first row
MirrorRow(row, dst_bot, width); // Mirror buffer into last row
CopyRow(src, row, width); // Copy top row into buffer
MirrorRow(src_bot, dst, width); // Mirror bottom row into top row
MirrorRow(row, dst_bot, width); // Mirror buffer into bottom row
src += src_stride;
dst += dst_stride;
src_bot -= src_stride;
@ -476,6 +476,120 @@ int RotatePlane(const uint8_t* src,
return -1;
}
LIBYUV_API
void TransposePlane_16(const uint16_t* src,
int src_stride,
uint16_t* dst,
int dst_stride,
int width,
int height) {
int i = height;
// Work across the source in 8x8 tiles
while (i >= 8) {
TransposeWx8_16_C(src, src_stride, dst, dst_stride, width);
src += 8 * src_stride; // Go down 8 rows.
dst += 8; // Move over 8 columns.
i -= 8;
}
if (i > 0) {
TransposeWxH_16_C(src, src_stride, dst, dst_stride, width, i);
}
}
static void RotatePlane90_16(const uint16_t* src,
int src_stride,
uint16_t* dst,
int dst_stride,
int width,
int height) {
// Rotate by 90 is a transpose with the source read
// from bottom to top. So set the source pointer to the end
// of the buffer and flip the sign of the source stride.
src += src_stride * (height - 1);
src_stride = -src_stride;
TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
}
static void RotatePlane270_16(const uint16_t* src,
int src_stride,
uint16_t* dst,
int dst_stride,
int width,
int height) {
// Rotate by 270 is a transpose with the destination written
// from bottom to top. So set the destination pointer to the end
// of the buffer and flip the sign of the destination stride.
dst += dst_stride * (width - 1);
dst_stride = -dst_stride;
TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
}
static void RotatePlane180_16(const uint16_t* src,
int src_stride,
uint16_t* dst,
int dst_stride,
int width,
int height) {
// Swap top and bottom row and mirror the content. Uses a temporary row.
align_buffer_64_16(row, width);
const uint16_t* src_bot = src + src_stride * (height - 1);
uint16_t* dst_bot = dst + dst_stride * (height - 1);
int half_height = (height + 1) >> 1;
int y;
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
CopyRow_16_C(src, row, width); // Copy top row into buffer
MirrorRow_16_C(src_bot, dst, width); // Mirror bottom row into top row
MirrorRow_16_C(row, dst_bot, width); // Mirror buffer into bottom row
src += src_stride;
dst += dst_stride;
src_bot -= src_stride;
dst_bot -= dst_stride;
}
free_aligned_buffer_64_16(row);
}
LIBYUV_API
int RotatePlane_16(const uint16_t* src,
int src_stride,
uint16_t* dst,
int dst_stride,
int width,
int height,
enum RotationMode mode) {
if (!src || width <= 0 || height == 0 || !dst) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src = src + (height - 1) * src_stride;
src_stride = -src_stride;
}
switch (mode) {
case kRotate0:
// copy frame
CopyPlane_16(src, src_stride, dst, dst_stride, width, height);
return 0;
case kRotate90:
RotatePlane90_16(src, src_stride, dst, dst_stride, width, height);
return 0;
case kRotate270:
RotatePlane270_16(src, src_stride, dst, dst_stride, width, height);
return 0;
case kRotate180:
RotatePlane180_16(src, src_stride, dst, dst_stride, width, height);
return 0;
default:
break;
}
return -1;
}
LIBYUV_API
int I420Rotate(const uint8_t* src_y,
int src_stride_y,
@ -544,6 +658,8 @@ int I420Rotate(const uint8_t* src_y,
return -1;
}
// I422 has half width x full height UV planes, so rotate by 90 and 270
// require scaling to maintain 422 subsampling.
LIBYUV_API
int I422Rotate(const uint8_t* src_y,
int src_stride_y,
@ -579,31 +695,42 @@ int I422Rotate(const uint8_t* src_y,
switch (mode) {
case kRotate0:
// copy frame
// Copy frame
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
return 0;
// Note on temporary Y plane for UV.
// Rotation of UV first fits within the Y destination plane rows.
// Y plane is width x height
// Y plane rotated is height x width
// UV plane is (width / 2) x height
// UV plane rotated is height x (width / 2)
// UV plane rotated+scaled is (height / 2) x width.
// UV plane rotated is a temporary that fits within the Y plane rotated.
case kRotate90:
// We need to rotate and rescale, we use plane Y as temporal storage.
RotatePlane90(src_u, src_stride_u, dst_y, height, halfwidth, height);
ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight,
RotatePlane90(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
height);
ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
halfheight, width, kFilterBilinear);
RotatePlane90(src_v, src_stride_v, dst_y, height, halfwidth, height);
ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight,
RotatePlane90(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
height);
ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
halfheight, width, kFilterLinear);
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
return 0;
case kRotate270:
// We need to rotate and rescale, we use plane Y as temporal storage.
RotatePlane270(src_u, src_stride_u, dst_y, height, halfwidth, height);
ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight,
RotatePlane270(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
height);
ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
halfheight, width, kFilterBilinear);
RotatePlane270(src_v, src_stride_v, dst_y, height, halfwidth, height);
ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight,
RotatePlane270(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
height);
ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
halfheight, width, kFilterLinear);
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
return 0;
case kRotate180:
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
@ -828,6 +955,228 @@ int Android420ToI420Rotate(const uint8_t* src_y,
return -1;
}
LIBYUV_API
int I010Rotate(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
!dst_u || !dst_v || dst_stride_y < 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
switch (mode) {
case kRotate0:
// copy frame
return I010Copy(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
dst_v, dst_stride_v, width, height);
case kRotate90:
RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotatePlane90_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
halfheight);
RotatePlane90_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
halfheight);
return 0;
case kRotate270:
RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
height);
RotatePlane270_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
halfheight);
RotatePlane270_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
halfheight);
return 0;
case kRotate180:
RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
height);
RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
halfheight);
RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
halfheight);
return 0;
default:
break;
}
return -1;
}
// I210 has half width x full height UV planes, so rotate by 90 and 270
// require scaling to maintain 422 subsampling.
LIBYUV_API
int I210Rotate(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
!dst_u || !dst_v) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
switch (mode) {
case kRotate0:
// Copy frame
CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
return 0;
// Note on temporary Y plane for UV.
// Rotation of UV first fits within the Y destination plane rows.
// Y plane is width x height
// Y plane rotated is height x width
// UV plane is (width / 2) x height
// UV plane rotated is height x (width / 2)
// UV plane rotated+scaled is (height / 2) x width.
// UV plane rotated is a temporary that fits within the Y plane rotated.
case kRotate90:
RotatePlane90_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
height);
ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
halfheight, width, kFilterBilinear);
RotatePlane90_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
height);
ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
halfheight, width, kFilterLinear);
RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
return 0;
case kRotate270:
RotatePlane270_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
height);
ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
halfheight, width, kFilterBilinear);
RotatePlane270_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
height);
ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
halfheight, width, kFilterLinear);
RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
height);
return 0;
case kRotate180:
RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
height);
RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
height);
RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
height);
return 0;
default:
break;
}
return -1;
}
LIBYUV_API
int I410Rotate(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode) {
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
!dst_u || !dst_v || dst_stride_y < 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
switch (mode) {
case kRotate0:
// copy frame
CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
case kRotate90:
RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotatePlane90_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
RotatePlane90_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
case kRotate270:
RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
height);
RotatePlane270_16(src_u, src_stride_u, dst_u, dst_stride_u, width,
height);
RotatePlane270_16(src_v, src_stride_v, dst_v, dst_stride_v, width,
height);
return 0;
case kRotate180:
RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
height);
RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, width,
height);
RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, width,
height);
return 0;
default:
break;
}
return -1;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -94,8 +94,74 @@ void TransposeUVWxH_C(const uint8_t* src,
for (i = 0; i < width * 2; i += 2) {
int j;
for (j = 0; j < height; ++j) {
dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
dst_a[((i >> 1) * dst_stride_a) + j] = src[i + (j * src_stride)];
dst_b[((i >> 1) * dst_stride_b) + j] = src[i + (j * src_stride) + 1];
}
}
}
void TransposeWx8_16_C(const uint16_t* src,
int src_stride,
uint16_t* dst,
int dst_stride,
int width) {
int i;
for (i = 0; i < width; ++i) {
dst[0] = src[0 * src_stride];
dst[1] = src[1 * src_stride];
dst[2] = src[2 * src_stride];
dst[3] = src[3 * src_stride];
dst[4] = src[4 * src_stride];
dst[5] = src[5 * src_stride];
dst[6] = src[6 * src_stride];
dst[7] = src[7 * src_stride];
++src;
dst += dst_stride;
}
}
void TransposeUVWx8_16_C(const uint16_t* src,
int src_stride,
uint16_t* dst_a,
int dst_stride_a,
uint16_t* dst_b,
int dst_stride_b,
int width) {
int i;
for (i = 0; i < width; ++i) {
dst_a[0] = src[0 * src_stride + 0];
dst_b[0] = src[0 * src_stride + 1];
dst_a[1] = src[1 * src_stride + 0];
dst_b[1] = src[1 * src_stride + 1];
dst_a[2] = src[2 * src_stride + 0];
dst_b[2] = src[2 * src_stride + 1];
dst_a[3] = src[3 * src_stride + 0];
dst_b[3] = src[3 * src_stride + 1];
dst_a[4] = src[4 * src_stride + 0];
dst_b[4] = src[4 * src_stride + 1];
dst_a[5] = src[5 * src_stride + 0];
dst_b[5] = src[5 * src_stride + 1];
dst_a[6] = src[6 * src_stride + 0];
dst_b[6] = src[6 * src_stride + 1];
dst_a[7] = src[7 * src_stride + 0];
dst_b[7] = src[7 * src_stride + 1];
src += 2;
dst_a += dst_stride_a;
dst_b += dst_stride_b;
}
}
void TransposeWxH_16_C(const uint16_t* src,
int src_stride,
uint16_t* dst,
int dst_stride,
int width,
int height) {
int i;
for (i = 0; i < width; ++i) {
int j;
for (j = 0; j < height; ++j) {
dst[i * dst_stride + j] = src[j * src_stride + i];
}
}
}

View File

@ -201,13 +201,13 @@ void TransposeWx8_NEON(const uint8_t* src,
"4: \n"
: "=&r"(src_temp), // %0
"+r"(src), // %1
"+r"(dst), // %2
"+r"(width) // %3
: "r"(&kVTbl4x4Transpose), // %4
"r"(static_cast<ptrdiff_t>(src_stride)), // %5
"r"(static_cast<ptrdiff_t>(dst_stride)) // %6
: "=&r"(src_temp), // %0
"+r"(src), // %1
"+r"(dst), // %2
"+r"(width) // %3
: "r"(&kVTbl4x4Transpose), // %4
"r"((ptrdiff_t)src_stride), // %5
"r"((ptrdiff_t)dst_stride) // %6
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
"v17", "v18", "v19", "v20", "v21", "v22", "v23");
}
@ -423,15 +423,15 @@ void TransposeUVWx8_NEON(const uint8_t* src,
"4: \n"
: "=&r"(src_temp), // %0
"+r"(src), // %1
"+r"(dst_a), // %2
"+r"(dst_b), // %3
"+r"(width) // %4
: "r"(static_cast<ptrdiff_t>(src_stride)), // %5
"r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6
"r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7
"r"(&kVTbl4x4TransposeDi) // %8
: "=&r"(src_temp), // %0
"+r"(src), // %1
"+r"(dst_a), // %2
"+r"(dst_b), // %3
"+r"(width) // %4
: "r"((ptrdiff_t)src_stride), // %5
"r"((ptrdiff_t)dst_stride_a), // %6
"r"((ptrdiff_t)dst_stride_b), // %7
"r"(&kVTbl4x4TransposeDi) // %8
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
"v17", "v18", "v19", "v20", "v21", "v22", "v23", "v30", "v31");
}

View File

@ -1036,6 +1036,9 @@ ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 15)
#ifdef HAS_ARGBTOYROW_MSA
ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15)
#endif
#ifdef HAS_ARGBTOYROW_LSX
ANY11(ARGBToYRow_Any_LSX, ARGBToYRow_LSX, 0, 4, 1, 15)
#endif
#ifdef HAS_ARGBTOYROW_LASX
ANY11(ARGBToYRow_Any_LASX, ARGBToYRow_LASX, 0, 4, 1, 31)
#endif
@ -1054,9 +1057,21 @@ ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15)
#ifdef HAS_ARGBTOYJROW_LSX
ANY11(ARGBToYJRow_Any_LSX, ARGBToYJRow_LSX, 0, 4, 1, 15)
#endif
#ifdef HAS_RGBATOYJROW_LSX
ANY11(RGBAToYJRow_Any_LSX, RGBAToYJRow_LSX, 0, 4, 1, 15)
#endif
#ifdef HAS_ABGRTOYJROW_LSX
ANY11(ABGRToYJRow_Any_LSX, ABGRToYJRow_LSX, 0, 4, 1, 15)
#endif
#ifdef HAS_RGBATOYJROW_LASX
ANY11(RGBAToYJRow_Any_LASX, RGBAToYJRow_LASX, 0, 4, 1, 31)
#endif
#ifdef HAS_ARGBTOYJROW_LASX
ANY11(ARGBToYJRow_Any_LASX, ARGBToYJRow_LASX, 0, 4, 1, 31)
#endif
#ifdef HAS_ABGRTOYJROW_LASX
ANY11(ABGRToYJRow_Any_LASX, ABGRToYJRow_LASX, 0, 4, 1, 31)
#endif
#ifdef HAS_BGRATOYROW_NEON
ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 15)
#endif
@ -1066,6 +1081,9 @@ ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15)
#ifdef HAS_BGRATOYROW_LSX
ANY11(BGRAToYRow_Any_LSX, BGRAToYRow_LSX, 0, 4, 1, 15)
#endif
#ifdef HAS_BGRATOYROW_LASX
ANY11(BGRAToYRow_Any_LASX, BGRAToYRow_LASX, 0, 4, 1, 31)
#endif
#ifdef HAS_ABGRTOYROW_NEON
ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 15)
#endif
@ -1075,6 +1093,9 @@ ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7)
#ifdef HAS_ABGRTOYROW_LSX
ANY11(ABGRToYRow_Any_LSX, ABGRToYRow_LSX, 0, 4, 1, 15)
#endif
#ifdef HAS_ABGRTOYROW_LASX
ANY11(ABGRToYRow_Any_LASX, ABGRToYRow_LASX, 0, 4, 1, 31)
#endif
#ifdef HAS_RGBATOYROW_NEON
ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 15)
#endif
@ -1084,6 +1105,9 @@ ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15)
#ifdef HAS_RGBATOYROW_LSX
ANY11(RGBAToYRow_Any_LSX, RGBAToYRow_LSX, 0, 4, 1, 15)
#endif
#ifdef HAS_RGBATOYROW_LASX
ANY11(RGBAToYRow_Any_LASX, RGBAToYRow_LASX, 0, 4, 1, 31)
#endif
#ifdef HAS_RGB24TOYROW_NEON
ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 15)
#endif
@ -1102,6 +1126,12 @@ ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
#ifdef HAS_RGB24TOYROW_LSX
ANY11(RGB24ToYRow_Any_LSX, RGB24ToYRow_LSX, 0, 3, 1, 15)
#endif
#ifdef HAS_RGB24TOYJROW_LSX
ANY11(RGB24ToYJRow_Any_LSX, RGB24ToYJRow_LSX, 0, 3, 1, 15)
#endif
#ifdef HAS_RGB24TOYJROW_LASX
ANY11(RGB24ToYJRow_Any_LASX, RGB24ToYJRow_LASX, 0, 3, 1, 31)
#endif
#ifdef HAS_RGB24TOYROW_LASX
ANY11(RGB24ToYRow_Any_LASX, RGB24ToYRow_LASX, 0, 3, 1, 31)
#endif
@ -1126,6 +1156,12 @@ ANY11(RAWToYRow_Any_LSX, RAWToYRow_LSX, 0, 3, 1, 15)
#ifdef HAS_RAWTOYROW_LASX
ANY11(RAWToYRow_Any_LASX, RAWToYRow_LASX, 0, 3, 1, 31)
#endif
#ifdef HAS_RAWTOYJROW_LSX
ANY11(RAWToYJRow_Any_LSX, RAWToYJRow_LSX, 0, 3, 1, 15)
#endif
#ifdef HAS_RAWTOYJROW_LASX
ANY11(RAWToYJRow_Any_LASX, RAWToYJRow_LASX, 0, 3, 1, 31)
#endif
#ifdef HAS_RGB565TOYROW_NEON
ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
#endif

File diff suppressed because it is too large Load Diff

View File

@ -775,40 +775,6 @@ void UYVYToUV422Row_LASX(const uint8_t* src_uyvy,
}
}
void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
int x;
int len = width / 32;
__m256i src0, src1, src2, src3, vec0, vec1, vec2, vec3;
__m256i tmp0, tmp1, dst0;
__m256i const_19 = __lasx_xvldi(0x19);
__m256i const_42 = __lasx_xvldi(0x42);
__m256i const_81 = __lasx_xvldi(0x81);
__m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
0x1080108010801080, 0x1080108010801080};
__m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
0x0000000700000003};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64,
src_argb0, 96, src0, src1, src2, src3);
vec0 = __lasx_xvpickev_b(src1, src0);
vec1 = __lasx_xvpickev_b(src3, src2);
vec2 = __lasx_xvpickod_b(src1, src0);
vec3 = __lasx_xvpickod_b(src3, src2);
tmp0 = __lasx_xvmaddwev_h_bu(const_1080, vec0, const_19);
tmp1 = __lasx_xvmaddwev_h_bu(const_1080, vec1, const_19);
tmp0 = __lasx_xvmaddwev_h_bu(tmp0, vec2, const_81);
tmp1 = __lasx_xvmaddwev_h_bu(tmp1, vec3, const_81);
tmp0 = __lasx_xvmaddwod_h_bu(tmp0, vec0, const_42);
tmp1 = __lasx_xvmaddwod_h_bu(tmp1, vec1, const_42);
dst0 = __lasx_xvssrani_b_h(tmp1, tmp0, 8);
dst0 = __lasx_xvperm_w(dst0, control);
__lasx_xvst(dst0, dst_y, 0);
src_argb0 += 128;
dst_y += 32;
}
}
void ARGBToUVRow_LASX(const uint8_t* src_argb0,
int src_stride_argb,
uint8_t* dst_u,
@ -1811,48 +1777,6 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565,
}
}
void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
int x;
int len = width / 32;
__m256i src0, src1, src2;
__m256i tmp0, tmp1, tmp2, tmp3;
__m256i reg0, reg1, reg2, dst0;
__m256i const_129 = __lasx_xvldi(129);
__m256i const_br = {0x4219421942194219, 0x4219421942194219,
0x4219421942194219, 0x4219421942194219};
__m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
0x1080108010801080, 0x1080108010801080};
__m256i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C, 0x0B09080605030200,
0x17151412110F0E0C};
__m256i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604, 0x0301001E1D1B1A18,
0x0F0D0C0A09070604};
__m256i shuff2 = {0x000A000700040001, 0x001600130010000D, 0x000A000700040001,
0x001600130010000D};
__m256i shuff3 = {0x0002001F001C0019, 0x000E000B00080005, 0x0002001F001C0019,
0x000E000B00080005};
for (x = 0; x < len; x++) {
reg0 = __lasx_xvld(src_rgb24, 0);
reg1 = __lasx_xvld(src_rgb24, 32);
reg2 = __lasx_xvld(src_rgb24, 64);
src0 = __lasx_xvpermi_q(reg1, reg0, 0x30);
src1 = __lasx_xvpermi_q(reg2, reg0, 0x21);
src2 = __lasx_xvpermi_q(reg2, reg1, 0x30);
tmp0 = __lasx_xvshuf_b(src1, src0, shuff0);
tmp1 = __lasx_xvshuf_b(src1, src2, shuff1);
tmp2 = __lasx_xvshuf_b(src1, src0, shuff2);
tmp3 = __lasx_xvshuf_b(src1, src2, shuff3);
reg0 = __lasx_xvmaddwev_h_bu(const_1080, tmp2, const_129);
reg1 = __lasx_xvmaddwev_h_bu(const_1080, tmp3, const_129);
reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp1);
dst0 = __lasx_xvpickod_b(reg1, reg0);
__lasx_xvst(dst0, dst_y, 0);
dst_y += 32;
src_rgb24 += 96;
}
}
void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_u,
@ -1916,48 +1840,6 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
}
}
void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
int x;
int len = width / 32;
__m256i src0, src1, src2;
__m256i tmp0, tmp1, tmp2, tmp3;
__m256i reg0, reg1, reg2, dst0;
__m256i const_129 = __lasx_xvldi(129);
__m256i const_br = {0x1942194219421942, 0x1942194219421942,
0x1942194219421942, 0x1942194219421942};
__m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
0x1080108010801080, 0x1080108010801080};
__m256i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C, 0x0B09080605030200,
0x17151412110F0E0C};
__m256i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604, 0x0301001E1D1B1A18,
0x0F0D0C0A09070604};
__m256i shuff2 = {0x000A000700040001, 0x001600130010000D, 0x000A000700040001,
0x001600130010000D};
__m256i shuff3 = {0x0002001F001C0019, 0x000E000B00080005, 0x0002001F001C0019,
0x000E000B00080005};
for (x = 0; x < len; x++) {
reg0 = __lasx_xvld(src_raw, 0);
reg1 = __lasx_xvld(src_raw, 32);
reg2 = __lasx_xvld(src_raw, 64);
src0 = __lasx_xvpermi_q(reg1, reg0, 0x30);
src1 = __lasx_xvpermi_q(reg2, reg0, 0x21);
src2 = __lasx_xvpermi_q(reg2, reg1, 0x30);
tmp0 = __lasx_xvshuf_b(src1, src0, shuff0);
tmp1 = __lasx_xvshuf_b(src1, src2, shuff1);
tmp2 = __lasx_xvshuf_b(src1, src0, shuff2);
tmp3 = __lasx_xvshuf_b(src1, src2, shuff3);
reg0 = __lasx_xvmaddwev_h_bu(const_1080, tmp2, const_129);
reg1 = __lasx_xvmaddwev_h_bu(const_1080, tmp3, const_129);
reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp1);
dst0 = __lasx_xvpickod_b(reg1, reg0);
__lasx_xvst(dst0, dst_y, 0);
dst_y += 32;
src_raw += 96;
}
}
void RAWToUVRow_LASX(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_u,
@ -2118,36 +2000,226 @@ void NV21ToARGBRow_LASX(const uint8_t* src_y,
}
}
void ARGBToYJRow_LASX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
int x;
int len = width / 32;
__m256i src0, src1, src2, src3, dst0;
__m256i tmp0, tmp1, tmp2, tmp3;
__m256i reg0, reg1;
__m256i const_128 = __lasx_xvldi(0x480);
__m256i const_150 = __lasx_xvldi(0x96);
__m256i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D,
0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D};
__m256i shuff = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
0x0000000700000003};
struct RgbConstants {
uint8_t kRGBToY[4];
uint16_t kAddY;
uint16_t pad;
};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src_argb, 64, src_argb,
96, src0, src1, src2, src3);
tmp0 = __lasx_xvpickev_b(src1, src0);
tmp1 = __lasx_xvpickod_b(src1, src0);
tmp2 = __lasx_xvpickev_b(src3, src2);
tmp3 = __lasx_xvpickod_b(src3, src2);
reg0 = __lasx_xvmaddwev_h_bu(const_128, tmp1, const_150);
reg1 = __lasx_xvmaddwev_h_bu(const_128, tmp3, const_150);
reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp2);
dst0 = __lasx_xvpickod_b(reg1, reg0);
dst0 = __lasx_xvperm_w(dst0, shuff);
__lasx_xvst(dst0, dst_y, 0);
dst_y += 32;
src_argb += 128;
}
// RGB to JPeg coefficients
// B * 0.1140 coefficient = 29
// G * 0.5870 coefficient = 150
// R * 0.2990 coefficient = 77
// Add 0.5 = 0x80
static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
128,
0};
static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
// RGB to BT.601 coefficients
// B * 0.1016 coefficient = 25
// G * 0.5078 coefficient = 129
// R * 0.2578 coefficient = 66
// Add 16.5 = 0x1080
static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
0x1080,
0};
static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
0x1080,
0};
// ARGB expects first 3 values to contain RGB and 4th value is ignored.
static void ARGBToYMatrixRow_LASX(const uint8_t* src_argb,
uint8_t* dst_y,
int width,
const struct RgbConstants* rgbconstants) {
int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7};
asm volatile(
"xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
"xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants
"xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants
"xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants
"xvld $xr20, %4, 0 \n\t" // load shuff
"1: \n\t"
"xvld $xr4, %0, 0 \n\t"
"xvld $xr5, %0, 32 \n\t"
"xvld $xr6, %0, 64 \n\t"
"xvld $xr7, %0, 96 \n\t" // load 32 pixels of ARGB
"xvor.v $xr12, $xr3, $xr3 \n\t"
"xvor.v $xr13, $xr3, $xr3 \n\t"
"addi.d %2, %2, -32 \n\t" // 32 processed per loop.
"xvpickev.b $xr8, $xr5, $xr4 \n\t" //BR
"xvpickev.b $xr10, $xr7, $xr6 \n\t"
"xvpickod.b $xr9, $xr5, $xr4 \n\t" //GA
"xvpickod.b $xr11, $xr7, $xr6 \n\t"
"xvmaddwev.h.bu $xr12, $xr8, $xr0 \n\t" //B
"xvmaddwev.h.bu $xr13, $xr10, $xr0 \n\t"
"xvmaddwev.h.bu $xr12, $xr9, $xr1 \n\t" //G
"xvmaddwev.h.bu $xr13, $xr11, $xr1 \n\t"
"xvmaddwod.h.bu $xr12, $xr8, $xr2 \n\t" //R
"xvmaddwod.h.bu $xr13, $xr10, $xr2 \n\t"
"addi.d %0, %0, 128 \n\t"
"xvpickod.b $xr10, $xr13, $xr12 \n\t"
"xvperm.w $xr11, $xr10, $xr20 \n\t"
"xvst $xr11, %1, 0 \n\t"
"addi.d %1, %1, 32 \n\t"
"bnez %2, 1b \n\t"
: "+&r"(src_argb), // %0
"+&r"(dst_y), // %1
"+&r"(width) // %2
: "r"(rgbconstants),
"r"(shuff)
: "memory"
);
}
void ARGBToYRow_LASX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_LASX(src_argb, dst_y, width, &kRgb24I601Constants);
}
void ARGBToYJRow_LASX(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
ARGBToYMatrixRow_LASX(src_argb, dst_yj, width, &kRgb24JPEGConstants);
}
void ABGRToYRow_LASX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_LASX(src_abgr, dst_y, width, &kRawI601Constants);
}
void ABGRToYJRow_LASX(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
ARGBToYMatrixRow_LASX(src_abgr, dst_yj, width, &kRawJPEGConstants);
}
// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
// Same code as ARGB, except the LD4
static void RGBAToYMatrixRow_LASX(const uint8_t* src_rgba,
uint8_t* dst_y,
int width,
const struct RgbConstants* rgbconstants) {
int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7};
asm volatile(
"xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
"xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants
"xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants
"xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants
"xvld $xr20, %4, 0 \n\t" // load shuff
"1: \n\t"
"xvld $xr4, %0, 0 \n\t"
"xvld $xr5, %0, 32 \n\t"
"xvld $xr6, %0, 64 \n\t"
"xvld $xr7, %0, 96 \n\t" // load 32 pixels of RGBA
"xvor.v $xr12, $xr3, $xr3 \n\t"
"xvor.v $xr13, $xr3, $xr3 \n\t"
"addi.d %2, %2, -32 \n\t" // 32 processed per loop.
"xvpickev.b $xr8, $xr5, $xr4 \n\t" //AG
"xvpickev.b $xr10, $xr7, $xr6 \n\t"
"xvpickod.b $xr9, $xr5, $xr4 \n\t" //BR
"xvpickod.b $xr11, $xr7, $xr6 \n\t"
"xvmaddwev.h.bu $xr12, $xr9, $xr0 \n\t" //B
"xvmaddwev.h.bu $xr13, $xr11, $xr0 \n\t"
"xvmaddwod.h.bu $xr12, $xr8, $xr1 \n\t" //G
"xvmaddwod.h.bu $xr13, $xr10, $xr1 \n\t"
"xvmaddwod.h.bu $xr12, $xr9, $xr2 \n\t" //R
"xvmaddwod.h.bu $xr13, $xr11, $xr2 \n\t"
"addi.d %0, %0, 128 \n\t"
"xvpickod.b $xr10, $xr13, $xr12 \n\t"
"xvperm.w $xr11, $xr10, $xr20 \n\t"
"xvst $xr11, %1, 0 \n\t"
"addi.d %1, %1, 32 \n\t"
"bnez %2, 1b \n\t"
: "+&r"(src_rgba), // %0
"+&r"(dst_y), // %1
"+&r"(width) // %2
: "r"(rgbconstants),
"r"(shuff)
: "memory"
);
}
void RGBAToYRow_LASX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
RGBAToYMatrixRow_LASX(src_rgba, dst_y, width, &kRgb24I601Constants);
}
void RGBAToYJRow_LASX(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
RGBAToYMatrixRow_LASX(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
}
void BGRAToYRow_LASX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
RGBAToYMatrixRow_LASX(src_bgra, dst_y, width, &kRawI601Constants);
}
static void RGBToYMatrixRow_LASX(const uint8_t* src_rgba,
uint8_t* dst_y,
int width,
const struct RgbConstants* rgbconstants) {
int8_t shuff[128] = {0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0,
25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
asm volatile(
"xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
"xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants
"xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants
"xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants
"xvld $xr4, %4, 0 \n\t" // load shuff
"xvld $xr5, %4, 32 \n\t"
"xvld $xr6, %4, 64 \n\t"
"xvld $xr7, %4, 96 \n\t"
"1: \n\t"
"xvld $xr8, %0, 0 \n\t"
"xvld $xr9, %0, 32 \n\t"
"xvld $xr10, %0, 64 \n\t" // load 32 pixels of RGB
"xvor.v $xr12, $xr3, $xr3 \n\t"
"xvor.v $xr13, $xr3, $xr3 \n\t"
"xvor.v $xr11, $xr9, $xr9 \n\t"
"addi.d %2, %2, -32 \n\t" // 32 processed per loop.
"xvpermi.q $xr9, $xr8, 0x30 \n\t" //src0
"xvpermi.q $xr8, $xr10, 0x03 \n\t" //src1
"xvpermi.q $xr10, $xr11, 0x30 \n\t" //src2
"xvshuf.b $xr14, $xr8, $xr9, $xr4 \n\t"
"xvshuf.b $xr15, $xr8, $xr10, $xr5 \n\t"
"xvshuf.b $xr16, $xr8, $xr9, $xr6 \n\t"
"xvshuf.b $xr17, $xr8, $xr10, $xr7 \n\t"
"xvmaddwev.h.bu $xr12, $xr16, $xr1 \n\t" //G
"xvmaddwev.h.bu $xr13, $xr17, $xr1 \n\t"
"xvmaddwev.h.bu $xr12, $xr14, $xr0 \n\t" //B
"xvmaddwev.h.bu $xr13, $xr15, $xr0 \n\t"
"xvmaddwod.h.bu $xr12, $xr14, $xr2 \n\t" //R
"xvmaddwod.h.bu $xr13, $xr15, $xr2 \n\t"
"addi.d %0, %0, 96 \n\t"
"xvpickod.b $xr10, $xr13, $xr12 \n\t"
"xvst $xr10, %1, 0 \n\t"
"addi.d %1, %1, 32 \n\t"
"bnez %2, 1b \n\t"
: "+&r"(src_rgba), // %0
"+&r"(dst_y), // %1
"+&r"(width) // %2
: "r"(rgbconstants), // %3
"r"(shuff) // %4
: "memory"
);
}
void RGB24ToYJRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
RGBToYMatrixRow_LASX(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
}
void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
RGBToYMatrixRow_LASX(src_raw, dst_yj, width, &kRawJPEGConstants);
}
void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
RGBToYMatrixRow_LASX(src_rgb24, dst_y, width, &kRgb24I601Constants);
}
void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
RGBToYMatrixRow_LASX(src_raw, dst_y, width, &kRawI601Constants);
}
void ARGBToUVJRow_LASX(const uint8_t* src_argb,

View File

@ -561,39 +561,6 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565,
}
}
void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
int x;
int len = width / 16;
__m128i src0, src1, src2;
__m128i tmp0, tmp1, tmp2, tmp3;
__m128i reg0, reg1, dst0;
__m128i const_129 = __lsx_vldi(129);
__m128i const_br = {0x4219421942194219, 0x4219421942194219};
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
__m128i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C};
__m128i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604};
__m128i shuff2 = {0x000A000700040001, 0x001600130010000D};
__m128i shuff3 = {0x0002001F001C0019, 0x000E000B00080005};
for (x = 0; x < len; x++) {
src0 = __lsx_vld(src_rgb24, 0);
src1 = __lsx_vld(src_rgb24, 16);
src2 = __lsx_vld(src_rgb24, 32);
tmp0 = __lsx_vshuf_b(src1, src0, shuff0);
tmp1 = __lsx_vshuf_b(src1, src2, shuff1);
tmp2 = __lsx_vshuf_b(src1, src0, shuff2);
tmp3 = __lsx_vshuf_b(src1, src2, shuff3);
reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp2, const_129);
reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1);
dst0 = __lsx_vpickod_b(reg1, reg0);
__lsx_vst(dst0, dst_y, 0);
dst_y += 16;
src_rgb24 += 48;
}
}
void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_u,
@ -647,39 +614,6 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
}
}
void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
int x;
int len = width / 16;
__m128i src0, src1, src2;
__m128i tmp0, tmp1, tmp2, tmp3;
__m128i reg0, reg1, dst0;
__m128i const_129 = __lsx_vldi(129);
__m128i const_br = {0x1942194219421942, 0x1942194219421942};
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
__m128i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C};
__m128i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604};
__m128i shuff2 = {0x000A000700040001, 0x001600130010000D};
__m128i shuff3 = {0x0002001F001C0019, 0x000E000B00080005};
for (x = 0; x < len; x++) {
src0 = __lsx_vld(src_raw, 0);
src1 = __lsx_vld(src_raw, 16);
src2 = __lsx_vld(src_raw, 32);
tmp0 = __lsx_vshuf_b(src1, src0, shuff0);
tmp1 = __lsx_vshuf_b(src1, src2, shuff1);
tmp2 = __lsx_vshuf_b(src1, src0, shuff2);
tmp3 = __lsx_vshuf_b(src1, src2, shuff3);
reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp2, const_129);
reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1);
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
__lsx_vst(dst0, dst_y, 0);
dst_y += 16;
src_raw += 48;
}
}
void RAWToUVRow_LSX(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_u,
@ -914,62 +848,6 @@ void SobelXYRow_LSX(const uint8_t* src_sobelx,
}
}
void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
int x;
int len = width / 16;
__m128i src0, src1, src2, src3, dst0;
__m128i tmp0, tmp1, tmp2, tmp3;
__m128i reg0, reg1;
__m128i const_128 = __lsx_vldi(0x480);
__m128i const_150 = __lsx_vldi(0x96);
__m128i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
src0, src1, src2, src3);
tmp0 = __lsx_vpickev_b(src1, src0);
tmp1 = __lsx_vpickod_b(src1, src0);
tmp2 = __lsx_vpickev_b(src3, src2);
tmp3 = __lsx_vpickod_b(src3, src2);
reg0 = __lsx_vmaddwev_h_bu(const_128, tmp1, const_150);
reg1 = __lsx_vmaddwev_h_bu(const_128, tmp3, const_150);
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
dst0 = __lsx_vpickod_b(reg1, reg0);
__lsx_vst(dst0, dst_y, 0);
dst_y += 16;
src_argb += 64;
}
}
void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
int x;
int len = width / 16;
__m128i src0, src1, src2, src3, dst0;
__m128i tmp0, tmp1, tmp2, tmp3;
__m128i reg0, reg1;
__m128i const_129 = __lsx_vldi(0x81);
__m128i const_br = {0x1942194219421942, 0x1942194219421942};
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48,
src0, src1, src2, src3);
tmp0 = __lsx_vpickod_b(src1, src0);
tmp1 = __lsx_vpickev_b(src1, src0);
tmp2 = __lsx_vpickod_b(src3, src2);
tmp3 = __lsx_vpickev_b(src3, src2);
reg0 = __lsx_vmaddwod_h_bu(const_1080, tmp1, const_129);
reg1 = __lsx_vmaddwod_h_bu(const_1080, tmp3, const_129);
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
__lsx_vst(dst0, dst_y, 0);
dst_y += 16;
src_bgra += 64;
}
}
void BGRAToUVRow_LSX(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_u,
@ -1018,34 +896,6 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra,
}
}
void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
int x;
int len = width / 16;
__m128i src0, src1, src2, src3, dst0;
__m128i tmp0, tmp1, tmp2, tmp3;
__m128i reg0, reg1;
__m128i const_129 = __lsx_vldi(0x81);
__m128i const_br = {0x1942194219421942, 0x1942194219421942};
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48,
src0, src1, src2, src3);
tmp0 = __lsx_vpickev_b(src1, src0);
tmp1 = __lsx_vpickod_b(src1, src0);
tmp2 = __lsx_vpickev_b(src3, src2);
tmp3 = __lsx_vpickod_b(src3, src2);
reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp1, const_129);
reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
__lsx_vst(dst0, dst_y, 0);
dst_y += 16;
src_abgr += 64;
}
}
void ABGRToUVRow_LSX(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_u,
@ -1094,34 +944,6 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr,
}
}
void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
int x;
int len = width / 16;
__m128i src0, src1, src2, src3, dst0;
__m128i tmp0, tmp1, tmp2, tmp3;
__m128i reg0, reg1;
__m128i const_129 = __lsx_vldi(0x81);
__m128i const_br = {0x4219421942194219, 0x4219421942194219};
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48,
src0, src1, src2, src3);
tmp0 = __lsx_vpickod_b(src1, src0);
tmp1 = __lsx_vpickev_b(src1, src0);
tmp2 = __lsx_vpickod_b(src3, src2);
tmp3 = __lsx_vpickev_b(src3, src2);
reg0 = __lsx_vmaddwod_h_bu(const_1080, tmp1, const_129);
reg1 = __lsx_vmaddwod_h_bu(const_1080, tmp3, const_129);
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
__lsx_vst(dst0, dst_y, 0);
dst_y += 16;
src_rgba += 64;
}
}
void RGBAToUVRow_LSX(const uint8_t* src_rgba,
int src_stride_rgba,
uint8_t* dst_u,
@ -1821,6 +1643,212 @@ void HalfFloatRow_LSX(const uint16_t* src,
}
}
struct RgbConstants {
uint8_t kRGBToY[4];
uint16_t kAddY;
uint16_t pad;
};
// RGB to JPeg coefficients
// B * 0.1140 coefficient = 29
// G * 0.5870 coefficient = 150
// R * 0.2990 coefficient = 77
// Add 0.5 = 0x80
static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
128,
0};
static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
// RGB to BT.601 coefficients
// B * 0.1016 coefficient = 25
// G * 0.5078 coefficient = 129
// R * 0.2578 coefficient = 66
// Add 16.5 = 0x1080
static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
0x1080,
0};
static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
0x1080,
0};
// ARGB expects first 3 values to contain RGB and 4th value is ignored.
static void ARGBToYMatrixRow_LSX(const uint8_t* src_argb,
uint8_t* dst_y,
int width,
const struct RgbConstants* rgbconstants) {
asm volatile(
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
"1: \n\t"
"vld $vr4, %0, 0 \n\t"
"vld $vr5, %0, 16 \n\t"
"vld $vr6, %0, 32 \n\t"
"vld $vr7, %0, 48 \n\t" // load 16 pixels of ARGB
"vor.v $vr12, $vr3, $vr3 \n\t"
"vor.v $vr13, $vr3, $vr3 \n\t"
"addi.d %2, %2, -16 \n\t" // 16 processed per loop.
"vpickev.b $vr8, $vr5, $vr4 \n\t" //BR
"vpickev.b $vr10, $vr7, $vr6 \n\t"
"vpickod.b $vr9, $vr5, $vr4 \n\t" //GA
"vpickod.b $vr11, $vr7, $vr6 \n\t"
"vmaddwev.h.bu $vr12, $vr8, $vr0 \n\t" //B
"vmaddwev.h.bu $vr13, $vr10, $vr0 \n\t"
"vmaddwev.h.bu $vr12, $vr9, $vr1 \n\t" //G
"vmaddwev.h.bu $vr13, $vr11, $vr1 \n\t"
"vmaddwod.h.bu $vr12, $vr8, $vr2 \n\t" //R
"vmaddwod.h.bu $vr13, $vr10, $vr2 \n\t"
"addi.d %0, %0, 64 \n\t"
"vpickod.b $vr10, $vr13, $vr12 \n\t"
"vst $vr10, %1, 0 \n\t"
"addi.d %1, %1, 16 \n\t"
"bnez %2, 1b \n\t"
: "+&r"(src_argb), // %0
"+&r"(dst_y), // %1
"+&r"(width) // %2
: "r"(rgbconstants)
: "memory"
);
}
void ARGBToYRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_LSX(src_argb, dst_y, width, &kRgb24I601Constants);
}
void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
ARGBToYMatrixRow_LSX(src_argb, dst_yj, width, &kRgb24JPEGConstants);
}
void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_LSX(src_abgr, dst_y, width, &kRawI601Constants);
}
void ABGRToYJRow_LSX(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
ARGBToYMatrixRow_LSX(src_abgr, dst_yj, width, &kRawJPEGConstants);
}
// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
// Same code as ARGB, except the LD4
static void RGBAToYMatrixRow_LSX(const uint8_t* src_rgba,
uint8_t* dst_y,
int width,
const struct RgbConstants* rgbconstants) {
asm volatile(
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
"1: \n\t"
"vld $vr4, %0, 0 \n\t"
"vld $vr5, %0, 16 \n\t"
"vld $vr6, %0, 32 \n\t"
"vld $vr7, %0, 48 \n\t" // load 16 pixels of RGBA
"vor.v $vr12, $vr3, $vr3 \n\t"
"vor.v $vr13, $vr3, $vr3 \n\t"
"addi.d %2, %2, -16 \n\t" // 16 processed per loop.
"vpickev.b $vr8, $vr5, $vr4 \n\t" //AG
"vpickev.b $vr10, $vr7, $vr6 \n\t"
"vpickod.b $vr9, $vr5, $vr4 \n\t" //BR
"vpickod.b $vr11, $vr7, $vr6 \n\t"
"vmaddwev.h.bu $vr12, $vr9, $vr0 \n\t" //B
"vmaddwev.h.bu $vr13, $vr11, $vr0 \n\t"
"vmaddwod.h.bu $vr12, $vr8, $vr1 \n\t" //G
"vmaddwod.h.bu $vr13, $vr10, $vr1 \n\t"
"vmaddwod.h.bu $vr12, $vr9, $vr2 \n\t" //R
"vmaddwod.h.bu $vr13, $vr11, $vr2 \n\t"
"addi.d %0, %0, 64 \n\t"
"vpickod.b $vr10, $vr13, $vr12 \n\t"
"vst $vr10, %1, 0 \n\t"
"addi.d %1, %1, 16 \n\t"
"bnez %2, 1b \n\t"
: "+&r"(src_rgba), // %0
"+&r"(dst_y), // %1
"+&r"(width) // %2
: "r"(rgbconstants)
: "memory"
);
}
void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
RGBAToYMatrixRow_LSX(src_rgba, dst_y, width, &kRgb24I601Constants);
}
void RGBAToYJRow_LSX(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
RGBAToYMatrixRow_LSX(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
}
void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
RGBAToYMatrixRow_LSX(src_bgra, dst_y, width, &kRawI601Constants);
}
static void RGBToYMatrixRow_LSX(const uint8_t* src_rgba,
uint8_t* dst_y,
int width,
const struct RgbConstants* rgbconstants) {
int8_t shuff[64] = {0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
asm volatile(
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
"vld $vr4, %4, 0 \n\t" // load shuff
"vld $vr5, %4, 16 \n\t"
"vld $vr6, %4, 32 \n\t"
"vld $vr7, %4, 48 \n\t"
"1: \n\t"
"vld $vr8, %0, 0 \n\t"
"vld $vr9, %0, 16 \n\t"
"vld $vr10, %0, 32 \n\t" // load 16 pixels of RGB
"vor.v $vr12, $vr3, $vr3 \n\t"
"vor.v $vr13, $vr3, $vr3 \n\t"
"addi.d %2, %2, -16 \n\t" // 16 processed per loop.
"vshuf.b $vr14, $vr9, $vr8, $vr4 \n\t"
"vshuf.b $vr15, $vr9, $vr10, $vr5 \n\t"
"vshuf.b $vr16, $vr9, $vr8, $vr6 \n\t"
"vshuf.b $vr17, $vr9, $vr10, $vr7 \n\t"
"vmaddwev.h.bu $vr12, $vr16, $vr1 \n\t" //G
"vmaddwev.h.bu $vr13, $vr17, $vr1 \n\t"
"vmaddwev.h.bu $vr12, $vr14, $vr0 \n\t" //B
"vmaddwev.h.bu $vr13, $vr15, $vr0 \n\t"
"vmaddwod.h.bu $vr12, $vr14, $vr2 \n\t" //R
"vmaddwod.h.bu $vr13, $vr15, $vr2 \n\t"
"addi.d %0, %0, 48 \n\t"
"vpickod.b $vr10, $vr13, $vr12 \n\t"
"vst $vr10, %1, 0 \n\t"
"addi.d %1, %1, 16 \n\t"
"bnez %2, 1b \n\t"
: "+&r"(src_rgba), // %0
"+&r"(dst_y), // %1
"+&r"(width) // %2
: "r"(rgbconstants), // %3
"r"(shuff) // %4
: "memory"
);
}
void RGB24ToYJRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
RGBToYMatrixRow_LSX(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
}
void RAWToYJRow_LSX(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
RGBToYMatrixRow_LSX(src_raw, dst_yj, width, &kRawJPEGConstants);
}
void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
RGBToYMatrixRow_LSX(src_rgb24, dst_y, width, &kRgb24I601Constants);
}
void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
RGBToYMatrixRow_LSX(src_raw, dst_y, width, &kRawI601Constants);
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -720,6 +720,60 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
}
#endif
void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
const uint8_t* src_lower_bits = src;
const uint8_t* src_upper_bits = src + 16;
asm volatile(
"1: \n"
"vld4.8 {d1, d3, d5, d7}, [%1]! \n" // Load 32 bytes of upper
// bits.
"vld1.8 {d6}, [%0]! \n" // Load 8 bytes of lower
// bits.
"vshl.u8 d4, d6, #2 \n" // Align lower bits.
"vshl.u8 d2, d6, #4 \n"
"vshl.u8 d0, d6, #6 \n"
"vzip.u8 d0, d1 \n" // Zip lower and upper
// bits together.
"vzip.u8 d2, d3 \n"
"vzip.u8 d4, d5 \n"
"vzip.u8 d6, d7 \n"
"vsri.u16 q0, q0, #10 \n" // Copy upper 6 bits into
// lower 6 bits for better
// accuracy in
// conversions.
"vsri.u16 q1, q1, #10 \n"
"vsri.u16 q2, q2, #10 \n"
"vsri.u16 q3, q3, #10 \n"
"vst4.16 {d0, d2, d4, d6}, [%2]! \n" // Store 32 pixels
"vst4.16 {d1, d3, d5, d7}, [%2]! \n"
"vld4.8 {d1, d3, d5, d7}, [%1]! \n" // Process last 32 pixels
// in the block
"vld1.8 {d6}, [%0]! \n"
"vshl.u8 d4, d6, #2 \n"
"vshl.u8 d2, d6, #4 \n"
"vshl.u8 d0, d6, #6 \n"
"vzip.u8 d0, d1 \n"
"vzip.u8 d2, d3 \n"
"vzip.u8 d4, d5 \n"
"vzip.u8 d6, d7 \n"
"vsri.u16 q0, q0, #10 \n"
"vsri.u16 q1, q1, #10 \n"
"vsri.u16 q2, q2, #10 \n"
"vsri.u16 q3, q3, #10 \n"
"vst4.16 {d0, d2, d4, d6}, [%2]! \n"
"vst4.16 {d1, d3, d5, d7}, [%2]! \n"
"mov %0, %1 \n"
"add %1, %0, #16 \n"
"subs %3, %3, #80 \n"
"bgt 1b \n"
: "+r"(src_lower_bits), // %0
"+r"(src_upper_bits), // %1
"+r"(dst), // %2
"+r"(size) // %3
:
: "cc", "memory", "q0", "q1", "q2", "q3");
}
// Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_NEON(const uint8_t* src_u,
const uint8_t* src_v,
@ -3857,31 +3911,25 @@ void DivideRow_16_NEON(const uint16_t* src_y,
int scale,
int width) {
asm volatile(
"vdup.16 q0, %3 \n"
"vdup.16 d8, %3 \n"
"1: \n"
"vld1.16 {q1}, [%0]! \n"
"vld1.16 {q2}, [%0]! \n"
"vmovl.u16 q3, d2 \n"
"vmovl.u16 q1, d3 \n"
"vmovl.u16 q4, d4 \n"
"vmovl.u16 q2, d5 \n"
"vshl.u32 q3, q3, q0 \n"
"vshl.u32 q4, q4, q0 \n"
"vshl.u32 q1, q1, q0 \n"
"vshl.u32 q2, q2, q0 \n"
"vmovn.u32 d2, q3 \n"
"vmovn.u32 d3, q1 \n"
"vmovn.u32 d4, q4 \n"
"vmovn.u32 d5, q2 \n"
"vst1.16 {q1}, [%1]! \n"
"vst1.16 {q2}, [%1]! \n"
"vld1.16 {q2, q3}, [%0]! \n"
"vmull.u16 q0, d4, d8 \n"
"vmull.u16 q1, d5, d8 \n"
"vmull.u16 q2, d6, d8 \n"
"vmull.u16 q3, d7, d8 \n"
"vshrn.u32 d0, q0, #16 \n"
"vshrn.u32 d1, q1, #16 \n"
"vshrn.u32 d2, q2, #16 \n"
"vshrn.u32 d3, q3, #16 \n"
"vst1.16 {q0, q1}, [%1]! \n" // store 16 pixels
"subs %2, %2, #16 \n" // 16 src pixels per loop
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
: "r"(scale) // %3
: "cc", "memory", "q0", "q1", "q2", "q3", "q4");
: "cc", "memory", "q0", "q1", "q2", "q3", "d8");
}
// Use scale to convert lsb formats to msb, depending how many bits there are:

View File

@ -749,6 +749,54 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
}
#endif
// Unpack MT2T into tiled P010 64 pixels at a time. See
// tinyurl.com/mtk-10bit-video-format for format documentation.
void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
const uint8_t* src_lower_bits = src;
const uint8_t* src_upper_bits = src + 16;
asm volatile(
"1: \n"
"ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [%1], #32 \n"
"ld1 {v7.8b}, [%0], #8 \n"
"shl v6.8b, v7.8b, #2 \n"
"shl v5.8b, v7.8b, #4 \n"
"shl v4.8b, v7.8b, #6 \n"
"zip1 v0.16b, v4.16b, v0.16b \n"
"zip1 v1.16b, v5.16b, v1.16b \n"
"zip1 v2.16b, v6.16b, v2.16b \n"
"zip1 v3.16b, v7.16b, v3.16b \n"
"sri v0.8h, v0.8h, #10 \n"
"sri v1.8h, v1.8h, #10 \n"
"sri v2.8h, v2.8h, #10 \n"
"sri v3.8h, v3.8h, #10 \n"
"st4 {v0.8h, v1.8h, v2.8h, v3.8h}, [%2], #64 \n"
"ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [%1], #32 \n"
"ld1 {v7.8b}, [%0], #8 \n"
"shl v6.8b, v7.8b, #2 \n"
"shl v5.8b, v7.8b, #4 \n"
"shl v4.8b, v7.8b, #6 \n"
"zip1 v0.16b, v4.16b, v0.16b \n"
"zip1 v1.16b, v5.16b, v1.16b \n"
"zip1 v2.16b, v6.16b, v2.16b \n"
"zip1 v3.16b, v7.16b, v3.16b \n"
"sri v0.8h, v0.8h, #10 \n"
"sri v1.8h, v1.8h, #10 \n"
"sri v2.8h, v2.8h, #10 \n"
"sri v3.8h, v3.8h, #10 \n"
"st4 {v0.8h, v1.8h, v2.8h, v3.8h}, [%2], #64 \n"
"mov %0, %1 \n"
"add %1, %0, #16 \n"
"subs %3, %3, #80 \n"
"b.gt 1b \n"
: "+r"(src_lower_bits), // %0
"+r"(src_upper_bits), // %1
"+r"(dst), // %2
"+r"(size) // %3
:
: "cc", "memory", "w0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v8", "v9", "v10", "v11", "v12");
}
#if LIBYUV_USE_ST2
// Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_NEON(const uint8_t* src_u,
@ -4413,23 +4461,19 @@ void DivideRow_16_NEON(const uint16_t* src_y,
int scale,
int width) {
asm volatile(
"dup v0.8h, %w3 \n"
"dup v4.8h, %w3 \n"
"1: \n"
"ldp q1, q2, [%0], #32 \n"
"ushll v3.4s, v1.4h, #0 \n"
"ushll v4.4s, v2.4h, #0 \n"
"ldp q2, q3, [%0], #32 \n"
"umull v0.4s, v2.4h, v4.4h \n"
"umull2 v1.4s, v2.8h, v4.8h \n"
"umull v2.4s, v3.4h, v4.4h \n"
"umull2 v3.4s, v3.8h, v4.8h \n"
"prfm pldl1keep, [%0, 448] \n"
"ushll2 v1.4s, v1.8h, #0 \n"
"ushll2 v2.4s, v2.8h, #0 \n"
"mul v3.4s, v0.4s, v3.4s \n"
"mul v4.4s, v0.4s, v4.4s \n"
"mul v1.4s, v0.4s, v1.4s \n"
"mul v2.4s, v0.4s, v2.4s \n"
"shrn v3.4h, v3.4s, #16 \n"
"shrn v4.4h, v4.4s, #16 \n"
"shrn2 v3.8h, v1.4s, #16 \n"
"shrn2 v4.8h, v2.4s, #16 \n"
"stp q3, q3, [%1], #32 \n" // store 16 pixels
"shrn v0.4h, v0.4s, #16 \n"
"shrn2 v0.8h, v1.4s, #16 \n"
"shrn v1.4h, v2.4s, #16 \n"
"shrn2 v1.8h, v3.4s, #16 \n"
"stp q0, q1, [%1], #32 \n" // store 16 pixels
"subs %w2, %w2, #16 \n" // 16 src pixels per loop
"b.gt 1b \n"
: "+r"(src_y), // %0

View File

@ -198,6 +198,51 @@ static void ScalePlaneDown2_16(int src_width,
}
}
void ScalePlaneDown2_16To8(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint8_t* dst_ptr,
int scale,
enum FilterMode filtering) {
int y;
void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
uint8_t* dst_ptr, int dst_width, int scale) =
(src_width & 1)
? (filtering == kFilterNone
? ScaleRowDown2_16To8_Odd_C
: (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_Odd_C
: ScaleRowDown2Box_16To8_Odd_C))
: (filtering == kFilterNone
? ScaleRowDown2_16To8_C
: (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C
: ScaleRowDown2Box_16To8_C));
int row_stride = src_stride * 2;
(void)dst_height;
if (!filtering) {
src_ptr += src_stride; // Point to odd rows.
src_stride = 0;
}
if (filtering == kFilterLinear) {
src_stride = 0;
}
for (y = 0; y < src_height / 2; ++y) {
ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width, scale);
src_ptr += row_stride;
dst_ptr += dst_stride;
}
if (src_height & 1) {
if (!filtering) {
src_ptr -= src_stride; // Point to last row.
}
ScaleRowDown2(src_ptr, 0, dst_ptr, dst_width, scale);
}
}
// Scale plane, 1/4
// This is an optimized version for scaling down a plane to 1/4 of
// its original size.
@ -775,9 +820,9 @@ static void ScaleAddCols2_C(int dst_width,
int ix = x >> 16;
x += dx;
boxwidth = MIN1((x >> 16) - ix);
*dst_ptr++ =
SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >>
16;
*dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + ix) *
scaletbl[boxwidth - minboxwidth] >>
16);
}
}
@ -814,7 +859,7 @@ static void ScaleAddCols0_C(int dst_width,
(void)dx;
src_ptr += (x >> 16);
for (i = 0; i < dst_width; ++i) {
*dst_ptr++ = src_ptr[i] * scaleval >> 16;
*dst_ptr++ = (uint8_t)(src_ptr[i] * scaleval >> 16);
}
}
@ -829,7 +874,7 @@ static void ScaleAddCols1_C(int dst_width,
int i;
x >>= 16;
for (i = 0; i < dst_width; ++i) {
*dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
*dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + x) * scaleval >> 16);
x += boxwidth;
}
}

View File

@ -58,9 +58,9 @@ static void ScaleARGBDown2(int src_width,
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column.
if (filtering == kFilterBilinear) {
src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
} else {
src_argb += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 4;
src_argb += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 4;
}
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
@ -162,7 +162,7 @@ static void ScaleARGBDown4Box(int src_width,
uint8_t* dst_argb, int dst_width) =
ScaleARGBRowDown2Box_C;
// Advance to odd row, even column.
src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
(void)src_width;
(void)src_height;
(void)dx;
@ -214,7 +214,7 @@ static void ScaleARGBDownEven(int src_width,
enum FilterMode filtering) {
int j;
int col_step = dx >> 16;
int row_stride = (dy >> 16) * (int64_t)src_stride;
ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
int src_step, uint8_t* dst_argb, int dst_width) =
filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
@ -222,7 +222,7 @@ static void ScaleARGBDownEven(int src_width,
(void)src_height;
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
@ -289,10 +289,10 @@ static void ScaleARGBBilinearDown(int src_width,
int dy,
enum FilterMode filtering) {
int j;
void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
@ -388,7 +388,7 @@ static void ScaleARGBBilinearDown(int src_width,
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint8_t* src = src_argb + yi * (int64_t)src_stride;
const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
if (filtering == kFilterLinear) {
ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
} else {
@ -421,10 +421,10 @@ static void ScaleARGBBilinearUp(int src_width,
int dy,
enum FilterMode filtering) {
int j;
void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
const int max_y = (src_height - 1) << 16;
@ -545,7 +545,7 @@ static void ScaleARGBBilinearUp(int src_width,
{
int yi = y >> 16;
const uint8_t* src = src_argb + yi * (int64_t)src_stride;
const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
// Allocate 2 rows of ARGB.
const int row_size = (dst_width * 4 + 31) & ~31;
@ -570,7 +570,7 @@ static void ScaleARGBBilinearUp(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_argb + yi * (int64_t)src_stride;
src = src_argb + yi * (intptr_t)src_stride;
}
if (yi != lasty) {
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@ -668,7 +668,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
#endif
void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
#if defined(HAS_INTERPOLATEROW_SSSE3)
@ -712,7 +712,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
#endif
void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
if (src_width >= 32768) {
@ -793,9 +793,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
int yi = y >> 16;
int uv_yi = yi >> kYShift;
const uint8_t* src_row_y = src_y + yi * (int64_t)src_stride_y;
const uint8_t* src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
const uint8_t* src_row_y = src_y + yi * (intptr_t)src_stride_y;
const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
// Allocate 2 rows of ARGB.
const int row_size = (dst_width * 4 + 31) & ~31;
@ -833,9 +833,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
y = max_y;
yi = y >> 16;
uv_yi = yi >> kYShift;
src_row_y = src_y + yi * (int64_t)src_stride_y;
src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
src_row_y = src_y + yi * (intptr_t)src_stride_y;
src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
}
if (yi != lasty) {
// TODO(fbarchard): Convert the clipped region of row.
@ -883,7 +883,7 @@ static void ScaleARGBSimple(int src_width,
int y,
int dy) {
int j;
void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb,
void (*ScaleARGBCols)(uint8_t* dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
(void)src_height;
@ -926,7 +926,7 @@ static void ScaleARGBSimple(int src_width,
}
for (j = 0; j < dst_height; ++j) {
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (int64_t)src_stride,
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (intptr_t)src_stride,
dst_width, x, dx);
dst_argb += dst_stride;
y += dy;
@ -962,7 +962,7 @@ static void ScaleARGB(const uint8_t* src,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * (int64_t)src_stride;
src = src + (src_height - 1) * (intptr_t)src_stride;
src_stride = -src_stride;
}
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@ -977,7 +977,7 @@ static void ScaleARGB(const uint8_t* src,
if (clip_y) {
int64_t clipf = (int64_t)(clip_y)*dy;
y += (clipf & 0xffff);
src += (clipf >> 16) * (int64_t)src_stride;
src += (clipf >> 16) * (intptr_t)src_stride;
dst += clip_y * dst_stride;
}
@ -1011,7 +1011,7 @@ static void ScaleARGB(const uint8_t* src,
filtering = kFilterNone;
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
ARGBCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 4,
ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4,
src_stride, dst, dst_stride, clip_width, clip_height);
return;
}

View File

@ -23,6 +23,25 @@ namespace libyuv {
extern "C" {
#endif
#ifdef __cplusplus
#define STATIC_CAST(type, expr) static_cast<type>(expr)
#else
#define STATIC_CAST(type, expr) (type)(expr)
#endif
// TODO(fbarchard): make clamp255 preserve negative values.
static __inline int32_t clamp255(int32_t v) {
return (-(v >= 255) | v) & 255;
}
// Use scale to convert lsb formats to msb, depending how many bits there are:
// 32768 = 9 bits
// 16384 = 10 bits
// 4096 = 12 bits
// 256 = 16 bits
// TODO(fbarchard): change scale to bits
#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
}
@ -62,6 +81,50 @@ void ScaleRowDown2_16_C(const uint16_t* src_ptr,
}
}
void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale) {
int x;
(void)src_stride;
assert(scale >= 256);
assert(scale <= 32768);
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
dst += 2;
src_ptr += 4;
}
if (dst_width & 1) {
dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
}
}
void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale) {
int x;
(void)src_stride;
assert(scale >= 256);
assert(scale <= 32768);
dst_width -= 1;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
dst += 2;
src_ptr += 4;
}
if (dst_width & 1) {
dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
dst += 1;
src_ptr += 2;
}
dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[0], scale));
}
void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@ -98,6 +161,52 @@ void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
}
}
void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale) {
const uint16_t* s = src_ptr;
int x;
(void)src_stride;
assert(scale >= 256);
assert(scale <= 32768);
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
dst += 2;
s += 4;
}
if (dst_width & 1) {
dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
}
}
void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale) {
const uint16_t* s = src_ptr;
int x;
(void)src_stride;
assert(scale >= 256);
assert(scale <= 32768);
dst_width -= 1;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
dst += 2;
s += 4;
}
if (dst_width & 1) {
dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
dst += 1;
s += 2;
}
dst[0] = STATIC_CAST(uint8_t, C16TO8(s[0], scale));
}
void ScaleRowDown2Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@ -160,6 +269,61 @@ void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
}
}
void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale) {
const uint16_t* s = src_ptr;
const uint16_t* t = src_ptr + src_stride;
int x;
assert(scale >= 256);
assert(scale <= 32768);
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = STATIC_CAST(uint8_t,
C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
dst[1] = STATIC_CAST(uint8_t,
C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
dst += 2;
s += 4;
t += 4;
}
if (dst_width & 1) {
dst[0] = STATIC_CAST(uint8_t,
C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
}
}
void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale) {
const uint16_t* s = src_ptr;
const uint16_t* t = src_ptr + src_stride;
int x;
assert(scale >= 256);
assert(scale <= 32768);
dst_width -= 1;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = STATIC_CAST(uint8_t,
C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
dst[1] = STATIC_CAST(uint8_t,
C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
dst += 2;
s += 4;
t += 4;
}
if (dst_width & 1) {
dst[0] = STATIC_CAST(uint8_t,
C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
dst += 1;
s += 2;
t += 2;
}
dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + t[0] + 1) >> 1, scale));
}
void ScaleRowDown4_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,

View File

@ -83,9 +83,9 @@ static void ScaleUVDown2(int src_width,
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column.
if (filtering == kFilterBilinear) {
src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
} else {
src_uv += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 2;
src_uv += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 2;
}
#if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
@ -200,7 +200,7 @@ static void ScaleUVDown4Box(int src_width,
uint8_t* dst_uv, int dst_width) =
ScaleUVRowDown2Box_C;
// Advance to odd row, even column.
src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
(void)src_width;
(void)src_height;
(void)dx;
@ -263,7 +263,7 @@ static void ScaleUVDownEven(int src_width,
enum FilterMode filtering) {
int j;
int col_step = dx >> 16;
int row_stride = (dy >> 16) * (int64_t)src_stride;
ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride,
int src_step, uint8_t* dst_uv, int dst_width) =
filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C;
@ -271,7 +271,7 @@ static void ScaleUVDownEven(int src_width,
(void)src_height;
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
#if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
@ -338,10 +338,10 @@ static void ScaleUVBilinearDown(int src_width,
int dy,
enum FilterMode filtering) {
int j;
void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
void (*ScaleUVFilterCols)(uint8_t* dst_uv, const uint8_t* src_uv,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleUVFilterCols64_C : ScaleUVFilterCols_C;
int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
@ -429,7 +429,7 @@ static void ScaleUVBilinearDown(int src_width,
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint8_t* src = src_uv + yi * (int64_t)src_stride;
const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
if (filtering == kFilterLinear) {
ScaleUVFilterCols(dst_uv, src, dst_width, x, dx);
} else {
@ -464,10 +464,10 @@ static void ScaleUVBilinearUp(int src_width,
int dy,
enum FilterMode filtering) {
int j;
void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
void (*ScaleUVFilterCols)(uint8_t* dst_uv, const uint8_t* src_uv,
int dst_width, int x, int dx) =
filtering ? ScaleUVFilterCols_C : ScaleUVCols_C;
const int max_y = (src_height - 1) << 16;
@ -571,7 +571,7 @@ static void ScaleUVBilinearUp(int src_width,
{
int yi = y >> 16;
const uint8_t* src = src_uv + yi * (int64_t)src_stride;
const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
// Allocate 2 rows of UV.
const int row_size = (dst_width * 2 + 15) & ~15;
@ -596,7 +596,7 @@ static void ScaleUVBilinearUp(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_uv + yi * (int64_t)src_stride;
src = src_uv + yi * (intptr_t)src_stride;
}
if (yi != lasty) {
ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@ -663,13 +663,13 @@ void ScaleUVLinearUp2(int src_width,
#endif
if (dst_height == 1) {
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
dst_uv += dst_stride;
y += dy;
}
@ -770,13 +770,13 @@ void ScaleUVLinearUp2_16(int src_width,
#endif
if (dst_height == 1) {
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
dst_uv += dst_stride;
y += dy;
}
@ -854,7 +854,7 @@ static void ScaleUVSimple(int src_width,
int y,
int dy) {
int j;
void (*ScaleUVCols)(uint8_t * dst_uv, const uint8_t* src_uv, int dst_width,
void (*ScaleUVCols)(uint8_t* dst_uv, const uint8_t* src_uv, int dst_width,
int x, int dx) =
(src_width >= 32768) ? ScaleUVCols64_C : ScaleUVCols_C;
(void)src_height;
@ -889,7 +889,7 @@ static void ScaleUVSimple(int src_width,
}
for (j = 0; j < dst_height; ++j) {
ScaleUVCols(dst_uv, src_uv + (y >> 16) * (int64_t)src_stride, dst_width, x,
ScaleUVCols(dst_uv, src_uv + (y >> 16) * (intptr_t)src_stride, dst_width, x,
dx);
dst_uv += dst_stride;
y += dy;
@ -910,7 +910,7 @@ static int UVCopy(const uint8_t* src_uv,
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
@ -930,7 +930,7 @@ static int UVCopy_16(const uint16_t* src_uv,
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
@ -968,7 +968,7 @@ static void ScaleUV(const uint8_t* src,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * (int64_t)src_stride;
src = src + (src_height - 1) * (intptr_t)src_stride;
src_stride = -src_stride;
}
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@ -983,7 +983,7 @@ static void ScaleUV(const uint8_t* src,
if (clip_y) {
int64_t clipf = (int64_t)(clip_y)*dy;
y += (clipf & 0xffff);
src += (clipf >> 16) * (int64_t)src_stride;
src += (clipf >> 16) * (intptr_t)src_stride;
dst += clip_y * dst_stride;
}
@ -1024,7 +1024,7 @@ static void ScaleUV(const uint8_t* src,
#ifdef HAS_UVCOPY
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
UVCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 2,
UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2,
src_stride, dst, dst_stride, clip_width, clip_height);
return;
}
@ -1118,7 +1118,7 @@ int UVScale_16(const uint16_t* src_uv,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src_uv = src_uv + (src_height - 1) * (int64_t)src_stride_uv;
src_uv = src_uv + (src_height - 1) * (intptr_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
src_width = Abs(src_width);
@ -1126,13 +1126,13 @@ int UVScale_16(const uint16_t* src_uv,
#ifdef HAS_UVCOPY
if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) {
if (dst_height == 1) {
UVCopy_16(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride_uv,
UVCopy_16(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride_uv,
src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height);
} else {
dy = src_height / dst_height;
UVCopy_16(src_uv + ((dy - 1) / 2) * (int64_t)src_stride_uv,
dy * (int64_t)src_stride_uv, dst_uv, dst_stride_uv, dst_width,
dst_height);
UVCopy_16(src_uv + ((dy - 1) / 2) * (intptr_t)src_stride_uv,
(int)(dy * (intptr_t)src_stride_uv), dst_uv, dst_stride_uv,
dst_width, dst_height);
}
return 0;

View File

@ -48,6 +48,7 @@ namespace libyuv {
#define AR30ToAR30 ARGBCopy
#define ABGRToABGR ARGBCopy
// subsample amount uses a divide.
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
// Planar test
@ -180,6 +181,7 @@ TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12)
TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10)
TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10)
TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10)
TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 10)
TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10)
TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12)
TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12)
@ -417,131 +419,136 @@ TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10)
TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12)
TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
DOY, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
"SRC_SUBSAMP_X unsupported"); \
static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
"SRC_SUBSAMP_Y unsupported"); \
static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
"DST_SUBSAMP_X unsupported"); \
static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
"DST_SUBSAMP_Y unsupported"); \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
const int kPaddedHeight = \
(kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
align_buffer_page_end( \
src_uv, \
2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
align_buffer_page_end(dst_uv_c, \
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
align_buffer_page_end(dst_uv_opt, \
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \
src_y_p[i] = \
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \
for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \
src_uv_p[i] = \
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \
memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \
DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \
reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \
NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \
DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \
reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \
NEG kHeight); \
} \
if (DOY) { \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth; ++j) { \
EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
} \
} \
} \
for (int i = 0; i < kDstHalfHeight; ++i) { \
for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \
EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \
dst_uv_opt[i * 2 * kDstHalfWidth + j]); \
} \
} \
free_aligned_buffer_page_end(dst_y_c); \
free_aligned_buffer_page_end(dst_uv_c); \
free_aligned_buffer_page_end(dst_y_opt); \
free_aligned_buffer_page_end(dst_uv_opt); \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_uv); \
#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH, \
TILE_WIDTH, TILE_HEIGHT) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
"SRC_SUBSAMP_X unsupported"); \
static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
"SRC_SUBSAMP_Y unsupported"); \
static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
"DST_SUBSAMP_X unsupported"); \
static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
"DST_SUBSAMP_Y unsupported"); \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
const int kPaddedHeight = \
(kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
align_buffer_page_end( \
src_uv, \
2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
align_buffer_page_end(dst_uv_c, \
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
align_buffer_page_end(dst_uv_opt, \
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
for (int i = 0; \
i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T); \
++i) { \
src_y_p[i] = \
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \
for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 * \
SRC_BPC / (int)sizeof(SRC_T); \
++i) { \
src_uv_p[i] = \
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \
memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \
reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \
NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \
reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \
NEG kHeight); \
} \
if (DOY) { \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth; ++j) { \
EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
} \
} \
} \
for (int i = 0; i < kDstHalfHeight; ++i) { \
for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \
EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \
dst_uv_opt[i * 2 * kDstHalfWidth + j]); \
} \
} \
free_aligned_buffer_page_end(dst_y_c); \
free_aligned_buffer_page_end(dst_uv_c); \
free_aligned_buffer_page_end(dst_y_opt); \
free_aligned_buffer_page_end(dst_uv_opt); \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_uv); \
}
#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, 1, \
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, 1, \
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1, \
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, \
TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0, \
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT)
TESTBIPLANARTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
TESTBIPLANARTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32)
#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
@ -621,30 +628,30 @@ TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
free_aligned_buffer_page_end(src_uv); \
}
#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, \
TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH, \
TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH, \
TILE_WIDTH, TILE_HEIGHT)
#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
TESTBIPLANARTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBIPLANARTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
TESTBPTOP(P010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10, 1, 1)
TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1)
// Provide matrix wrappers for full range bt.709
#define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \
@ -1069,8 +1076,8 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
#endif
#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \
BPP_B, W1280, N, NEG, OFF) \
#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
@ -1123,15 +1130,15 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
free_aligned_buffer_page_end(dst_argb32_opt); \
}
#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_ + 1, _Any, +, 0) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, _Unaligned, +, 2) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, _Invert, -, 0) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, _Opt, +, 0)
#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_ + 1, _Any, +, 0) \
TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, _Unaligned, +, 2) \
TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, _Invert, -, 0) \
TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, _Opt, +, 0)
#define JNV12ToARGB(a, b, c, d, e, f, g, h) \
NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
@ -1152,29 +1159,29 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
#define JNV12ToRGB565(a, b, c, d, e, f, g, h) \
NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
TESTBIPLANARTOB(JNV12, 2, 2, ARGB, ARGB, 4)
TESTBIPLANARTOB(JNV21, 2, 2, ARGB, ARGB, 4)
TESTBIPLANARTOB(JNV12, 2, 2, ABGR, ABGR, 4)
TESTBIPLANARTOB(JNV21, 2, 2, ABGR, ABGR, 4)
TESTBIPLANARTOB(JNV12, 2, 2, RGB24, RGB24, 3)
TESTBIPLANARTOB(JNV21, 2, 2, RGB24, RGB24, 3)
TESTBIPLANARTOB(JNV12, 2, 2, RAW, RAW, 3)
TESTBIPLANARTOB(JNV21, 2, 2, RAW, RAW, 3)
TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4)
TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4)
TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4)
TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4)
TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3)
TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3)
TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3)
TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTBIPLANARTOB(JNV12, 2, 2, RGB565, RGB565, 2)
TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2)
#endif
TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4)
TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4)
TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4)
TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4)
TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3)
TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3)
TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3)
TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3)
TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3)
TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4)
TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4)
TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4)
TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4)
TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3)
TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3)
TESTBPTOB(NV12, 2, 2, RAW, RAW, 3)
TESTBPTOB(NV21, 2, 2, RAW, RAW, 3)
TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2)
TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
#endif
#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
@ -1269,8 +1276,8 @@ TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1)
TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2)
TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \
SUBSAMP_Y, W1280, N, NEG, OFF) \
#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
@ -1316,25 +1323,25 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
free_aligned_buffer_page_end(src_argb); \
}
#define TESTATOBIPLANAR(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_ + 1, _Any, +, 0) \
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Unaligned, +, 2) \
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Invert, -, 0) \
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Opt, +, 0)
#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_ + 1, _Any, +, 0) \
TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Unaligned, +, 2) \
TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Invert, -, 0) \
TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Opt, +, 0)
TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2)
TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2)
TESTATOBIPLANAR(ABGR, 1, 4, NV12, 2, 2)
TESTATOBIPLANAR(ABGR, 1, 4, NV21, 2, 2)
TESTATOBIPLANAR(RAW, 1, 3, JNV21, 2, 2)
TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2)
TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)
TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
TESTATOBP(ARGB, 1, 4, NV12, 2, 2)
TESTATOBP(ARGB, 1, 4, NV21, 2, 2)
TESTATOBP(ABGR, 1, 4, NV12, 2, 2)
TESTATOBP(ABGR, 1, 4, NV21, 2, 2)
TESTATOBP(RAW, 1, 3, JNV21, 2, 2)
TESTATOBP(YUY2, 2, 4, NV12, 2, 2)
TESTATOBP(UYVY, 2, 4, NV12, 2, 2)
TESTATOBP(AYUV, 1, 4, NV12, 2, 2)
TESTATOBP(AYUV, 1, 4, NV21, 2, 2)
#define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \
@ -3915,8 +3922,8 @@ TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10)
TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
#endif // DISABLE_SLOW_TESTS
#define TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
@ -3959,16 +3966,16 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
free_aligned_buffer_page_end(dst_argb_opt); \
}
#define TESTBIPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
ALIGN, YALIGN, S_DEPTH) \
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, S_DEPTH) \
TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \
TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \
TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
#define P010ToARGB(a, b, c, d, e, f, g, h) \
P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
@ -4011,23 +4018,23 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
kFilterBilinear)
#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
TESTBIPLANAR16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
TESTBIPLANAR16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
TESTBIPLANAR16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
TESTBIPLANAR16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
TESTBIPLANAR16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTBIPLANAR16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
TESTBIPLANAR16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
TESTBIPLANAR16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
TESTBIPLANAR16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
TESTBIPLANAR16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
#endif // LITTLE_ENDIAN_ONLY_TEST
#endif // DISABLE_SLOW_TESTS

View File

@ -225,4 +225,110 @@ TEST_F(LibYUVRotateTest, RotatePlane90_TestStride) {
free_aligned_buffer_page_end(src_argb);
}
static void TestRotatePlane_16(int src_width,
int src_height,
int dst_width,
int dst_height,
libyuv::RotationMode mode,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (src_width < 1) {
src_width = 1;
}
if (src_height < 1) {
src_height = 1;
}
if (dst_width < 1) {
dst_width = 1;
}
if (dst_height < 1) {
dst_height = 1;
}
int src_stride = src_width;
int src_plane_size = src_stride * abs(src_height);
align_buffer_page_end_16(src, src_plane_size);
for (int i = 0; i < src_plane_size; ++i) {
src[i] = fastrand() & 0xff;
}
int dst_stride = dst_width;
int dst_plane_size = dst_stride * dst_height;
align_buffer_page_end_16(dst_c, dst_plane_size);
align_buffer_page_end_16(dst_opt, dst_plane_size);
memset(dst_c, 2, dst_plane_size);
memset(dst_opt, 3, dst_plane_size);
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
RotatePlane_16(src, src_stride, dst_c, dst_stride, src_width, src_height,
mode);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
for (int i = 0; i < benchmark_iterations; ++i) {
RotatePlane_16(src, src_stride, dst_opt, dst_stride, src_width, src_height,
mode);
}
// Rotation should be exact.
for (int i = 0; i < dst_plane_size; ++i) {
EXPECT_EQ(dst_c[i], dst_opt[i]);
}
free_aligned_buffer_page_end_16(dst_c);
free_aligned_buffer_page_end_16(dst_opt);
free_aligned_buffer_page_end_16(src);
}
TEST_F(LibYUVRotateTest, RotatePlane0_16_Opt) {
TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_,
benchmark_height_, kRotate0, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, RotatePlane90_16_Opt) {
TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_,
benchmark_width_, kRotate90, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, RotatePlane180_16_Opt) {
TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_,
benchmark_height_, kRotate180, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, RotatePlane270_16_Opt) {
TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_,
benchmark_width_, kRotate270, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, RotatePlane0_16_Odd) {
TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
benchmark_width_ + 1, benchmark_height_ + 1, kRotate0,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, RotatePlane90_16_Odd) {
TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
benchmark_height_ + 1, benchmark_width_ + 1, kRotate90,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, RotatePlane180_16_Odd) {
TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
benchmark_width_ + 1, benchmark_height_ + 1, kRotate180,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, RotatePlane270_16_Odd) {
TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
benchmark_height_ + 1, benchmark_width_ + 1, kRotate270,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
} // namespace libyuv

View File

@ -596,4 +596,266 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
#undef TESTAPLANARTOP
#undef TESTAPLANARTOPI
static void I010TestRotate(int src_width,
int src_height,
int dst_width,
int dst_height,
libyuv::RotationMode mode,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (src_width < 1) {
src_width = 1;
}
if (src_height == 0) {
src_height = 1;
}
if (dst_width < 1) {
dst_width = 1;
}
if (dst_height < 1) {
dst_height = 1;
}
int src_i010_y_size = src_width * Abs(src_height);
int src_i010_uv_size = ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2);
int src_i010_size = src_i010_y_size + src_i010_uv_size * 2;
align_buffer_page_end_16(src_i010, src_i010_size);
for (int i = 0; i < src_i010_size; ++i) {
src_i010[i] = fastrand() & 0x3ff;
}
int dst_i010_y_size = dst_width * dst_height;
int dst_i010_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2);
int dst_i010_size = dst_i010_y_size + dst_i010_uv_size * 2;
align_buffer_page_end_16(dst_i010_c, dst_i010_size);
align_buffer_page_end_16(dst_i010_opt, dst_i010_size);
memset(dst_i010_c, 2, dst_i010_size * 2);
memset(dst_i010_opt, 3, dst_i010_size * 2);
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
I010Rotate(src_i010, src_width, src_i010 + src_i010_y_size,
(src_width + 1) / 2, src_i010 + src_i010_y_size + src_i010_uv_size,
(src_width + 1) / 2, dst_i010_c, dst_width,
dst_i010_c + dst_i010_y_size, (dst_width + 1) / 2,
dst_i010_c + dst_i010_y_size + dst_i010_uv_size,
(dst_width + 1) / 2, src_width, src_height, mode);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
for (int i = 0; i < benchmark_iterations; ++i) {
I010Rotate(
src_i010, src_width, src_i010 + src_i010_y_size, (src_width + 1) / 2,
src_i010 + src_i010_y_size + src_i010_uv_size, (src_width + 1) / 2,
dst_i010_opt, dst_width, dst_i010_opt + dst_i010_y_size,
(dst_width + 1) / 2, dst_i010_opt + dst_i010_y_size + dst_i010_uv_size,
(dst_width + 1) / 2, src_width, src_height, mode);
}
// Rotation should be exact.
for (int i = 0; i < dst_i010_size; ++i) {
EXPECT_EQ(dst_i010_c[i], dst_i010_opt[i]);
}
free_aligned_buffer_page_end_16(dst_i010_c);
free_aligned_buffer_page_end_16(dst_i010_opt);
free_aligned_buffer_page_end_16(src_i010);
}
TEST_F(LibYUVRotateTest, I010Rotate0_Opt) {
I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
benchmark_height_, kRotate0, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I010Rotate90_Opt) {
I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
benchmark_width_, kRotate90, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I010Rotate180_Opt) {
I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
benchmark_height_, kRotate180, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I010Rotate270_Opt) {
I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
benchmark_width_, kRotate270, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
static void I210TestRotate(int src_width,
int src_height,
int dst_width,
int dst_height,
libyuv::RotationMode mode,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (src_width < 1) {
src_width = 1;
}
if (src_height == 0) {
src_height = 1;
}
if (dst_width < 1) {
dst_width = 1;
}
if (dst_height < 1) {
dst_height = 1;
}
int src_i210_y_size = src_width * Abs(src_height);
int src_i210_uv_size = ((src_width + 1) / 2) * Abs(src_height);
int src_i210_size = src_i210_y_size + src_i210_uv_size * 2;
align_buffer_page_end_16(src_i210, src_i210_size);
for (int i = 0; i < src_i210_size; ++i) {
src_i210[i] = fastrand() & 0x3ff;
}
int dst_i210_y_size = dst_width * dst_height;
int dst_i210_uv_size = ((dst_width + 1) / 2) * dst_height;
int dst_i210_size = dst_i210_y_size + dst_i210_uv_size * 2;
align_buffer_page_end_16(dst_i210_c, dst_i210_size);
align_buffer_page_end_16(dst_i210_opt, dst_i210_size);
memset(dst_i210_c, 2, dst_i210_size * 2);
memset(dst_i210_opt, 3, dst_i210_size * 2);
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
I210Rotate(src_i210, src_width, src_i210 + src_i210_y_size,
(src_width + 1) / 2, src_i210 + src_i210_y_size + src_i210_uv_size,
(src_width + 1) / 2, dst_i210_c, dst_width,
dst_i210_c + dst_i210_y_size, (dst_width + 1) / 2,
dst_i210_c + dst_i210_y_size + dst_i210_uv_size,
(dst_width + 1) / 2, src_width, src_height, mode);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
for (int i = 0; i < benchmark_iterations; ++i) {
I210Rotate(
src_i210, src_width, src_i210 + src_i210_y_size, (src_width + 1) / 2,
src_i210 + src_i210_y_size + src_i210_uv_size, (src_width + 1) / 2,
dst_i210_opt, dst_width, dst_i210_opt + dst_i210_y_size,
(dst_width + 1) / 2, dst_i210_opt + dst_i210_y_size + dst_i210_uv_size,
(dst_width + 1) / 2, src_width, src_height, mode);
}
// Rotation should be exact.
for (int i = 0; i < dst_i210_size; ++i) {
EXPECT_EQ(dst_i210_c[i], dst_i210_opt[i]);
}
free_aligned_buffer_page_end_16(dst_i210_c);
free_aligned_buffer_page_end_16(dst_i210_opt);
free_aligned_buffer_page_end_16(src_i210);
}
TEST_F(LibYUVRotateTest, I210Rotate0_Opt) {
I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
benchmark_height_, kRotate0, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I210Rotate90_Opt) {
I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
benchmark_width_, kRotate90, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I210Rotate180_Opt) {
I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
benchmark_height_, kRotate180, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I210Rotate270_Opt) {
I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
benchmark_width_, kRotate270, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
static void I410TestRotate(int src_width,
int src_height,
int dst_width,
int dst_height,
libyuv::RotationMode mode,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (src_width < 1) {
src_width = 1;
}
if (src_height == 0) {
src_height = 1;
}
if (dst_width < 1) {
dst_width = 1;
}
if (dst_height < 1) {
dst_height = 1;
}
int src_i410_y_size = src_width * Abs(src_height);
int src_i410_uv_size = src_width * Abs(src_height);
int src_i410_size = src_i410_y_size + src_i410_uv_size * 2;
align_buffer_page_end_16(src_i410, src_i410_size);
for (int i = 0; i < src_i410_size; ++i) {
src_i410[i] = fastrand() & 0x3ff;
}
int dst_i410_y_size = dst_width * dst_height;
int dst_i410_uv_size = dst_width * dst_height;
int dst_i410_size = dst_i410_y_size + dst_i410_uv_size * 2;
align_buffer_page_end_16(dst_i410_c, dst_i410_size);
align_buffer_page_end_16(dst_i410_opt, dst_i410_size);
memset(dst_i410_c, 2, dst_i410_size * 2);
memset(dst_i410_opt, 3, dst_i410_size * 2);
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width,
src_i410 + src_i410_y_size + src_i410_uv_size, src_width,
dst_i410_c, dst_width, dst_i410_c + dst_i410_y_size, dst_width,
dst_i410_c + dst_i410_y_size + dst_i410_uv_size, dst_width,
src_width, src_height, mode);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
for (int i = 0; i < benchmark_iterations; ++i) {
I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width,
src_i410 + src_i410_y_size + src_i410_uv_size, src_width,
dst_i410_opt, dst_width, dst_i410_opt + dst_i410_y_size,
dst_width, dst_i410_opt + dst_i410_y_size + dst_i410_uv_size,
dst_width, src_width, src_height, mode);
}
// Rotation should be exact.
for (int i = 0; i < dst_i410_size; ++i) {
EXPECT_EQ(dst_i410_c[i], dst_i410_opt[i]);
}
free_aligned_buffer_page_end_16(dst_i410_c);
free_aligned_buffer_page_end_16(dst_i410_opt);
free_aligned_buffer_page_end_16(src_i410);
}
TEST_F(LibYUVRotateTest, I410Rotate0_Opt) {
I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
benchmark_height_, kRotate0, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I410Rotate90_Opt) {
I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
benchmark_width_, kRotate90, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I410Rotate180_Opt) {
I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
benchmark_height_, kRotate180, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I410Rotate270_Opt) {
I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
benchmark_width_, kRotate270, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
} // namespace libyuv

View File

@ -11,6 +11,7 @@
#ifndef UNIT_TEST_UNIT_TEST_H_ // NOLINT
#define UNIT_TEST_UNIT_TEST_H_
#include <stddef.h> // For NULL
#ifdef _WIN32
#include <windows.h>
#else
@ -76,7 +77,18 @@ static inline bool SizeValid(int src_width,
#define free_aligned_buffer_page_end(var) \
free(var##_mem); \
var = 0
var = NULL
#define align_buffer_page_end_16(var, size) \
uint8_t* var##_mem = \
reinterpret_cast<uint8_t*>(malloc(((size)*2 + 4095 + 63) & ~4095)); \
uint16_t* var = reinterpret_cast<uint16_t*>( \
(intptr_t)(var##_mem + (((size)*2 + 4095 + 63) & ~4095) - (size)*2) & \
~63)
#define free_aligned_buffer_page_end_16(var) \
free(var##_mem); \
var = NULL
#ifdef WIN32
static inline double get_time() {

View File

@ -43,9 +43,10 @@
// #define BR (-VR * 128 + YB)
int main(int argc, const char* argv[]) {
if (argc < 2) {
printf("yuvconstants Kr Kb\n");
printf(" MC BT KR = 0.2126; KB = 0.0722\n");
if (argc < 3) {
printf("yuvconstants [KR] [KB]\n");
printf(" e.g. yuvconstants 0.2126 0.0722\n");
printf(" MC BT KR KB\n");
printf(" 1 BT.709 KR = 0.2126; KB = 0.0722\n");
printf(" 4 FCC KR = 0.30; KB = 0.11\n");
printf(" 6 BT.601 KR = 0.299; KB = 0.114\n");

View File

@ -23,7 +23,7 @@ origin:
# Revision to pull in
# Must be a long or short commit SHA (long preferred)
revision: ea26d7adb1da4c1bd80e99b9d2f6e9ca0b9cde0e
revision: b2528b0be934de1918e20c85fc170d809eeb49ab
# The package's license, where possible using the mnemonic from
# https://spdx.org/licenses/