mirror of
https://github.com/mozilla/gecko-dev.git
synced 2025-02-26 20:30:41 +00:00
Bug 1811322 - P0 - vendor libyuv to b2528b0be934;r=webrtc-reviewers,mjf
Differential Revision: https://phabricator.services.mozilla.com/D167858
This commit is contained in:
parent
8d1005d303
commit
c1bc9bcd59
@ -48,7 +48,7 @@ TARGET_LINK_LIBRARIES ( yuvconstants ${ly_lib_static} )
|
||||
find_package ( JPEG )
|
||||
if (JPEG_FOUND)
|
||||
include_directories( ${JPEG_INCLUDE_DIR} )
|
||||
target_link_libraries( yuvconvert ${JPEG_LIBRARY} )
|
||||
target_link_libraries( ${ly_lib_shared} ${JPEG_LIBRARY} )
|
||||
add_definitions( -DHAVE_JPEG )
|
||||
endif()
|
||||
|
||||
|
@ -5,7 +5,7 @@ gclient_gn_args = [
|
||||
|
||||
vars = {
|
||||
'chromium_git': 'https://chromium.googlesource.com',
|
||||
'chromium_revision': '1c174f8519b2926ff3e621467b6aa282b4934f4a',
|
||||
'chromium_revision': '504c0697552240028c5412dafd2a7306a7cd4be7',
|
||||
'gn_version': 'git_revision:6f13aaac55a977e1948910942675c69f2b4f7a94',
|
||||
# ninja CIPD package version.
|
||||
# https://chrome-infra-packages.appspot.com/p/infra/3pp/tools/ninja
|
||||
@ -15,17 +15,29 @@ vars = {
|
||||
|
||||
# Keep the Chromium default of generating location tags.
|
||||
'generate_location_tags': True,
|
||||
|
||||
# By default, download the fuchsia sdk from the public sdk directory.
|
||||
'fuchsia_sdk_cipd_prefix': 'fuchsia/sdk/gn/',
|
||||
'fuchsia_version': 'version:10.20221110.2.1',
|
||||
# By default, download the fuchsia images from the fuchsia GCS bucket.
|
||||
'fuchsia_images_bucket': 'fuchsia',
|
||||
'checkout_fuchsia': False,
|
||||
# Since the images are hundreds of MB, default to only downloading the image
|
||||
# most commonly useful for developers. Bots and developers that need to use
|
||||
# other images can override this with additional images.
|
||||
'checkout_fuchsia_boot_images': "terminal.qemu-x64",
|
||||
'checkout_fuchsia_product_bundles': '"{checkout_fuchsia_boot_images}" != ""',
|
||||
}
|
||||
|
||||
deps = {
|
||||
'src/build':
|
||||
Var('chromium_git') + '/chromium/src/build' + '@' + '18e9d3c3adadf2489507e4e62afffafa46717d26',
|
||||
Var('chromium_git') + '/chromium/src/build' + '@' + 'fe1231e1da1e95acb006f53d06caaad16756a376',
|
||||
'src/buildtools':
|
||||
Var('chromium_git') + '/chromium/src/buildtools' + '@' + '33b52eafd539278600d34cd9ba23550d28c933d2',
|
||||
Var('chromium_git') + '/chromium/src/buildtools' + '@' + '3c8fef071edb88facb7508060e978c5fb8608dd5',
|
||||
'src/testing':
|
||||
Var('chromium_git') + '/chromium/src/testing' + '@' + 'aedf4723b9fcaf5a76164085f4a8e9797eee4bee',
|
||||
Var('chromium_git') + '/chromium/src/testing' + '@' + 'b4dc828e84ae95e1f5bf855f040c065287dac335',
|
||||
'src/third_party':
|
||||
Var('chromium_git') + '/chromium/src/third_party' + '@' + 'd6591989fa347099fd4c7d47ba8bf6ce900b4f8e',
|
||||
Var('chromium_git') + '/chromium/src/third_party' + '@' + '73f7282fa28ca1fbe8401e391207fb6ccf34767f',
|
||||
|
||||
'src/buildtools/linux64': {
|
||||
'packages': [
|
||||
@ -71,30 +83,30 @@ deps = {
|
||||
'src/buildtools/clang_format/script':
|
||||
Var('chromium_git') + '/external/github.com/llvm/llvm-project/clang/tools/clang-format.git' + '@' + '8b525d2747f2584fc35d8c7e612e66f377858df7',
|
||||
'src/buildtools/third_party/libc++/trunk':
|
||||
Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + 'fc6bbc5eb039769b5ed2de84444a3c6f9b45a598',
|
||||
Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + 'cd0a05047451dfbdef5ba85f97ac4888e432a377',
|
||||
'src/buildtools/third_party/libc++abi/trunk':
|
||||
Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '8dd405113a4f3694e910b79785dd7fb7535a888a',
|
||||
Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '1a32724f721e1c3b6c590a07fe4a954344f15e48',
|
||||
'src/buildtools/third_party/libunwind/trunk':
|
||||
Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + 'aabcd8753678f1536e15eb6385a948470debdae4',
|
||||
Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + '5870472fdd17f33d923b02e3e0acb9cbb18dbc9a',
|
||||
|
||||
'src/third_party/catapult':
|
||||
Var('chromium_git') + '/catapult.git' + '@' + '3ffa6b222803f54188a7b249383b2f092a24d19a',
|
||||
Var('chromium_git') + '/catapult.git' + '@' + '4efb51be8574f2969273012958eaae85d01ede0b',
|
||||
'src/third_party/colorama/src':
|
||||
Var('chromium_git') + '/external/colorama.git' + '@' + '799604a1041e9b3bc5d2789ecbd7e8db2e18e6b8',
|
||||
'src/third_party/depot_tools':
|
||||
Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + 'b52683fa2e74087464d32a1a9c76bf1b5275e4fe',
|
||||
Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + '2fc7e1ffd58b00601b47a5126201e5162911e244',
|
||||
'src/third_party/freetype/src':
|
||||
Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + 'dea2e6358b2f963008d447d27564dd79890b61f0',
|
||||
Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + '1c44de209cb465d175279dc30cd95f9857f703dd',
|
||||
'src/third_party/googletest/src':
|
||||
Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + 'af29db7ec28d6df1c7f0f745186884091e602e07',
|
||||
'src/third_party/harfbuzz-ng/src':
|
||||
Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '56c467093598ec559a7148b61e112e9de52b7076',
|
||||
Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '2822b589bc837fae6f66233e2cf2eef0f6ce8470',
|
||||
'src/third_party/libjpeg_turbo':
|
||||
Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + 'ed683925e4897a84b3bffc5c1414c85b97a129a3',
|
||||
'src/third_party/nasm':
|
||||
Var('chromium_git') + '/chromium/deps/nasm.git' + '@' + '0873b2bae6a5388a1c55deac8456e3c60a47ca08',
|
||||
'src/tools':
|
||||
Var('chromium_git') + '/chromium/src/tools' + '@' + 'a185bbc6c077438a59a89a97c6c6ae30895e976c',
|
||||
Var('chromium_git') + '/chromium/src/tools' + '@' + 'a20d904d021175f221bf58921a5a67fd48420ed9',
|
||||
|
||||
# libyuv-only dependencies (not present in Chromium).
|
||||
'src/third_party/gtest-parallel':
|
||||
@ -116,14 +128,10 @@ deps = {
|
||||
'condition': 'checkout_android',
|
||||
'dep_type': 'cipd',
|
||||
},
|
||||
'src/third_party/auto/src': {
|
||||
'url': Var('chromium_git') + '/external/github.com/google/auto.git' + '@' + '3659a0e6436d3acfeda04e0bd1df3603f1e7ffac',
|
||||
'condition': 'checkout_android',
|
||||
},
|
||||
'src/third_party/boringssl/src':
|
||||
'https://boringssl.googlesource.com/boringssl.git' + '@' + '1ee71185a2322dc354bee5e5a0abfb1810a27dc6',
|
||||
'https://boringssl.googlesource.com/boringssl.git' + '@' + 'f0518d45119dd4dd322a884669daf8247bc3c992',
|
||||
'src/base': {
|
||||
'url': Var('chromium_git') + '/chromium/src/base' + '@' + '077682171b88d0aa0cb77a8e1cd4d959f58a20a3',
|
||||
'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'f80120ff3265ba9bcb27416cc489343cfdc8bc61',
|
||||
'condition': 'checkout_android',
|
||||
},
|
||||
'src/third_party/bazel': {
|
||||
@ -288,7 +296,7 @@ deps = {
|
||||
},
|
||||
|
||||
'src/third_party/icu': {
|
||||
'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'da07448619763d1cde255b361324242646f5b268',
|
||||
'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + '1b7d391f0528fb3a4976b7541b387ee04f915f83',
|
||||
},
|
||||
'src/third_party/icu4j': {
|
||||
'packages': [
|
||||
@ -329,7 +337,7 @@ deps = {
|
||||
'condition': 'checkout_android',
|
||||
},
|
||||
'src/third_party/junit/src': {
|
||||
'url': Var('chromium_git') + '/external/junit.git' + '@' + '64155f8a9babcfcf4263cf4d08253a1556e75481',
|
||||
'url': Var('chromium_git') + '/external/junit.git' + '@' + '05fe2a64f59127c02135be22f416e91260d6ede6',
|
||||
'condition': 'checkout_android',
|
||||
},
|
||||
'src/third_party/libunwindstack': {
|
||||
@ -443,7 +451,7 @@ deps = {
|
||||
|
||||
# iOS deps:
|
||||
'src/ios': {
|
||||
'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '211070da56a62cf7d2f7c7a81be29b57294c4343',
|
||||
'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '866ec86ecb27dad8a3ac7957590df7765a13834f',
|
||||
'condition': 'checkout_ios'
|
||||
},
|
||||
|
||||
@ -2245,29 +2253,74 @@ hooks = [
|
||||
'condition': 'checkout_mac',
|
||||
},
|
||||
{
|
||||
'name': 'msan_chained_origins',
|
||||
'name': 'msan_chained_origins_focal',
|
||||
'pattern': '.',
|
||||
'condition': 'checkout_instrumented_libraries',
|
||||
'action': [ 'python3',
|
||||
'src/third_party/depot_tools/download_from_google_storage.py',
|
||||
"--no_resume",
|
||||
"--no_auth",
|
||||
"--bucket", "chromium-instrumented-libraries",
|
||||
"-s", "src/third_party/instrumented_libraries/binaries/msan-chained-origins.tgz.sha1",
|
||||
'--no_resume',
|
||||
'--no_auth',
|
||||
'--bucket', 'chromium-instrumented-libraries',
|
||||
'-s', 'src/third_party/instrumented_libraries/binaries/msan-chained-origins-focal.tgz.sha1',
|
||||
],
|
||||
},
|
||||
{
|
||||
'name': 'msan_no_origins',
|
||||
'name': 'msan_no_origins_focal',
|
||||
'pattern': '.',
|
||||
'condition': 'checkout_instrumented_libraries',
|
||||
'action': [ 'python3',
|
||||
'src/third_party/depot_tools/download_from_google_storage.py',
|
||||
"--no_resume",
|
||||
"--no_auth",
|
||||
"--bucket", "chromium-instrumented-libraries",
|
||||
"-s", "src/third_party/instrumented_libraries/binaries/msan-no-origins.tgz.sha1",
|
||||
'--no_resume',
|
||||
'--no_auth',
|
||||
'--bucket', 'chromium-instrumented-libraries',
|
||||
'-s', 'src/third_party/instrumented_libraries/binaries/msan-no-origins-focal.tgz.sha1',
|
||||
],
|
||||
},
|
||||
{
|
||||
'name': 'msan_chained_origins_xenial',
|
||||
'pattern': '.',
|
||||
'condition': 'checkout_instrumented_libraries',
|
||||
'action': [ 'python3',
|
||||
'src/third_party/depot_tools/download_from_google_storage.py',
|
||||
'--no_resume',
|
||||
'--no_auth',
|
||||
'--bucket', 'chromium-instrumented-libraries',
|
||||
'-s', 'src/third_party/instrumented_libraries/binaries/msan-chained-origins-xenial.tgz.sha1',
|
||||
],
|
||||
},
|
||||
{
|
||||
'name': 'msan_no_origins_xenial',
|
||||
'pattern': '.',
|
||||
'condition': 'checkout_instrumented_libraries',
|
||||
'action': [ 'python3',
|
||||
'src/third_party/depot_tools/download_from_google_storage.py',
|
||||
'--no_resume',
|
||||
'--no_auth',
|
||||
'--bucket', 'chromium-instrumented-libraries',
|
||||
'-s', 'src/third_party/instrumented_libraries/binaries/msan-no-origins-xenial.tgz.sha1',
|
||||
],
|
||||
},
|
||||
{
|
||||
'name': 'Download Fuchsia SDK from GCS',
|
||||
'pattern': '.',
|
||||
'condition': 'checkout_fuchsia',
|
||||
'action': [
|
||||
'python3',
|
||||
'src/build/fuchsia/update_sdk.py',
|
||||
'--cipd-prefix={fuchsia_sdk_cipd_prefix}',
|
||||
'--version={fuchsia_version}',
|
||||
],
|
||||
},
|
||||
{
|
||||
'name': 'Download Fuchsia system images',
|
||||
'pattern': '.',
|
||||
'condition': 'checkout_fuchsia and checkout_fuchsia_product_bundles',
|
||||
'action': [
|
||||
'python3',
|
||||
'src/build/fuchsia/update_product_bundles.py',
|
||||
'{checkout_fuchsia_boot_images}',
|
||||
],
|
||||
},
|
||||
{
|
||||
# Pull clang if needed or requested via GYP_DEFINES.
|
||||
# Note: On Win, this should run after win_toolchain, as it may use it.
|
||||
|
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1850
|
||||
Version: 1857
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
@ -162,6 +162,22 @@ int MM21ToYUY2(const uint8_t* src_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert MT2T to P010
|
||||
// Note that src_y and src_uv point to packed 10-bit values, so the Y plane will
|
||||
// be 10 / 8 times the dimensions of the image. Also for this reason,
|
||||
// src_stride_y and src_stride_uv are given in bytes.
|
||||
LIBYUV_API
|
||||
int MT2TToP010(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I422 to NV21.
|
||||
LIBYUV_API
|
||||
int I422ToNV21(const uint8_t* src_y,
|
||||
@ -283,6 +299,23 @@ int I210ToI422(const uint16_t* src_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#define H410ToH420 I410ToI420
|
||||
LIBYUV_API
|
||||
int I410ToI420(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#define H410ToH444 I410ToI444
|
||||
LIBYUV_API
|
||||
int I410ToI444(const uint16_t* src_y,
|
||||
@ -571,6 +604,36 @@ int NV16ToNV24(const uint8_t* src_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert P010 to I010.
|
||||
LIBYUV_API
|
||||
int P010ToI010(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert P012 to I012.
|
||||
LIBYUV_API
|
||||
int P012ToI012(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert P010 to P410.
|
||||
LIBYUV_API
|
||||
int P010ToP410(const uint16_t* src_y,
|
||||
|
@ -392,6 +392,24 @@ int I210Copy(const uint16_t* src_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy I410 to I410.
|
||||
#define I410ToI410 I410Copy
|
||||
LIBYUV_API
|
||||
int I410Copy(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy NV12. Supports inverting.
|
||||
LIBYUV_API
|
||||
int NV12Copy(const uint8_t* src_y,
|
||||
|
@ -85,6 +85,60 @@ int I444Rotate(const uint8_t* src_y,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate I010 frame.
|
||||
LIBYUV_API
|
||||
int I010Rotate(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate I210 frame.
|
||||
LIBYUV_API
|
||||
int I210Rotate(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate I410 frame.
|
||||
LIBYUV_API
|
||||
int I410Rotate(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate NV12 input and store in I420.
|
||||
LIBYUV_API
|
||||
int NV12ToI420Rotate(const uint8_t* src_y,
|
||||
@ -156,6 +210,16 @@ void RotatePlane270(const uint8_t* src,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Rotate a plane by 0, 90, 180, or 270.
|
||||
LIBYUV_API
|
||||
int RotatePlane_16(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotations for when U and V are interleaved.
|
||||
// These functions take one UV input pointer and
|
||||
// split the data into two buffers while
|
||||
|
@ -215,7 +215,23 @@ void TransposeUVWx16_Any_LSX(const uint8_t* src,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeWxH_16_C(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
void TransposeWx8_16_C(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx1_16_C(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
@ -11,7 +11,8 @@
|
||||
#ifndef INCLUDE_LIBYUV_ROW_H_
|
||||
#define INCLUDE_LIBYUV_ROW_H_
|
||||
|
||||
#include <stdlib.h> // For malloc.
|
||||
#include <stddef.h> // For NULL
|
||||
#include <stdlib.h> // For malloc
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
@ -176,9 +177,8 @@ extern "C" {
|
||||
|
||||
// The following functions fail on gcc/clang 32 bit with fpic and framepointer.
|
||||
// caveat: clangcl uses row_win.cc which works.
|
||||
#if !defined(MOZ_PROFILING) && \
|
||||
(defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
|
||||
defined(_MSC_VER))
|
||||
#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
|
||||
defined(_MSC_VER)
|
||||
// TODO(fbarchard): fix build error on android_full_debug=1
|
||||
// https://code.google.com/p/libyuv/issues/detail?id=517
|
||||
#define HAS_I422ALPHATOARGBROW_SSSE3
|
||||
@ -247,9 +247,8 @@ extern "C" {
|
||||
#define HAS_ARGBATTENUATEROW_AVX2
|
||||
#endif
|
||||
|
||||
#if !defined(MOZ_PROFILING) && \
|
||||
(defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
|
||||
defined(_MSC_VER))
|
||||
#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
|
||||
defined(_MSC_VER)
|
||||
// TODO(fbarchard): fix build error on android_full_debug=1
|
||||
// https://code.google.com/p/libyuv/issues/detail?id=517
|
||||
#define HAS_I422ALPHATOARGBROW_AVX2
|
||||
@ -457,6 +456,7 @@ extern "C" {
|
||||
#define HAS_DETILEROW_NEON
|
||||
#define HAS_DETILESPLITUVROW_NEON
|
||||
#define HAS_DETILETOYUY2_NEON
|
||||
#define HAS_UNPACKMT2T_NEON
|
||||
#define HAS_DIVIDEROW_16_NEON
|
||||
#define HAS_HALFFLOATROW_NEON
|
||||
#define HAS_HALFMERGEUVROW_NEON
|
||||
@ -686,6 +686,11 @@ extern "C" {
|
||||
#define HAS_SPLITUVROW_LSX
|
||||
#define HAS_UYVYTOARGBROW_LSX
|
||||
#define HAS_YUY2TOARGBROW_LSX
|
||||
#define HAS_ARGBTOYROW_LSX
|
||||
#define HAS_ABGRTOYJROW_LSX
|
||||
#define HAS_RGBATOYJROW_LSX
|
||||
#define HAS_RGB24TOYJROW_LSX
|
||||
#define HAS_RAWTOYJROW_LSX
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_LASX) && defined(__loongarch_asx)
|
||||
@ -713,6 +718,8 @@ extern "C" {
|
||||
#define HAS_ARGBTOUVROW_LASX
|
||||
#define HAS_ARGBTOYJROW_LASX
|
||||
#define HAS_ARGBTOYROW_LASX
|
||||
#define HAS_ABGRTOYJROW_LASX
|
||||
#define HAS_ABGRTOYROW_LASX
|
||||
#define HAS_I422ALPHATOARGBROW_LASX
|
||||
#define HAS_I422TOARGB1555ROW_LASX
|
||||
#define HAS_I422TOARGB4444ROW_LASX
|
||||
@ -742,6 +749,11 @@ extern "C" {
|
||||
#define HAS_YUY2TOUV422ROW_LASX
|
||||
#define HAS_YUY2TOUVROW_LASX
|
||||
#define HAS_YUY2TOYROW_LASX
|
||||
#define HAS_RGBATOYROW_LASX
|
||||
#define HAS_RGBATOYJROW_LASX
|
||||
#define HAS_BGRATOYROW_LASX
|
||||
#define HAS_RGB24TOYJROW_LASX
|
||||
#define HAS_RAWTOYJROW_LASX
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
|
||||
@ -830,13 +842,21 @@ struct YuvConstants {
|
||||
|
||||
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
|
||||
|
||||
#define align_buffer_64(var, size) \
|
||||
uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \
|
||||
uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */
|
||||
#define align_buffer_64(var, size) \
|
||||
void* var##_mem = malloc((size) + 63); /* NOLINT */ \
|
||||
uint8_t* var = (uint8_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */
|
||||
|
||||
#define free_aligned_buffer_64(var) \
|
||||
free(var##_mem); \
|
||||
var = 0
|
||||
var = NULL
|
||||
|
||||
#define align_buffer_64_16(var, size) \
|
||||
void* var##_mem = malloc((size)*2 + 63); /* NOLINT */ \
|
||||
uint16_t* var = (uint16_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */
|
||||
|
||||
#define free_aligned_buffer_64_16(var) \
|
||||
free(var##_mem); \
|
||||
var = NULL
|
||||
|
||||
#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
|
||||
#define OMITFP
|
||||
@ -1193,9 +1213,14 @@ void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
|
||||
void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
|
||||
void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ARGBToYRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ARGBToYJRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ABGRToYJRow_LSX(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
|
||||
void RGBAToYJRow_LSX(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
|
||||
void ARGBToYJRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ABGRToYJRow_LASX(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
|
||||
void RGBAToYJRow_LASX(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
|
||||
void ARGBToUV444Row_NEON(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
@ -1419,6 +1444,8 @@ void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width);
|
||||
void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width);
|
||||
void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
||||
void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
|
||||
void RGB24ToYJRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
|
||||
void ABGRToYRow_LASX(const uint8_t* src_abgr, uint8_t* dst_y, int width);
|
||||
void ARGB1555ToYRow_LASX(const uint8_t* src_argb1555,
|
||||
uint8_t* dst_y,
|
||||
int width);
|
||||
@ -1428,6 +1455,11 @@ void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
||||
void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
||||
void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
||||
void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
||||
void RGBAToYRow_LASX(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
||||
void BGRAToYRow_LASX(const uint8_t* src_bgra, uint8_t* dst_y, int width);
|
||||
void RGB24ToYJRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
|
||||
void RAWToYJRow_LSX(const uint8_t* src_raw, uint8_t* dst_yj, int width);
|
||||
void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width);
|
||||
|
||||
void ARGBToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||
void ARGBToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||
@ -1491,10 +1523,15 @@ void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr,
|
||||
void BGRAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ABGRToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGBAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGBToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGBToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB24ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB565ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ABGRToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGBAToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB24ToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGB1555ToYRow_Any_LSX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
@ -1503,7 +1540,14 @@ void RGB565ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB24ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGBToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGBToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ABGRToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ABGRToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGBAToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGBAToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void BGRAToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB24ToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGB1555ToYRow_Any_LASX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
@ -1935,6 +1979,8 @@ void MirrorSplitUVRow_C(const uint8_t* src_uv,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
|
||||
void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width);
|
||||
|
||||
void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
|
||||
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
|
||||
void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
|
||||
@ -2124,6 +2170,8 @@ void DetileToYUY2_Any_NEON(const uint8_t* src_y,
|
||||
ptrdiff_t src_uv_tile_stride,
|
||||
uint8_t* dst_yuy2,
|
||||
int width);
|
||||
void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size);
|
||||
void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size);
|
||||
void MergeUVRow_C(const uint8_t* src_u,
|
||||
const uint8_t* src_v,
|
||||
uint8_t* dst_uv,
|
||||
|
@ -214,6 +214,17 @@ void ScalePlaneVertical_16To8(int src_height,
|
||||
int scale,
|
||||
enum FilterMode filtering);
|
||||
|
||||
void ScalePlaneDown2_16To8(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint16_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int scale,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Simplify the filtering based on scale factors.
|
||||
enum FilterMode ScaleFilterReduce(int src_width,
|
||||
int src_height,
|
||||
@ -259,6 +270,16 @@ void ScaleRowDown2_16_C(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width,
|
||||
int scale);
|
||||
void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width,
|
||||
int scale);
|
||||
void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
@ -267,6 +288,16 @@ void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width,
|
||||
int scale);
|
||||
void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width,
|
||||
int scale);
|
||||
void ScaleRowDown2Box_C(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
@ -279,6 +310,16 @@ void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width,
|
||||
int scale);
|
||||
void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width,
|
||||
int scale);
|
||||
void ScaleRowDown4_C(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
|
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1850
|
||||
#define LIBYUV_VERSION 1857
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
@ -52,7 +52,7 @@
|
||||
'optimize': 'max', # enable O2 and ltcg.
|
||||
},
|
||||
# Allows libyuv.a redistributable library without external dependencies.
|
||||
# 'standalone_static_library': 1,
|
||||
'standalone_static_library': 1,
|
||||
'conditions': [
|
||||
# Disable -Wunused-parameter
|
||||
['clang == 1', {
|
||||
@ -70,11 +70,6 @@
|
||||
'-mfpu=vfpv3-d16',
|
||||
# '-mthumb', # arm32 not thumb
|
||||
],
|
||||
'cflags_mozilla!': [
|
||||
'-mfpu=vfp',
|
||||
'-mfpu=vfpv3',
|
||||
'-mfpu=vfpv3-d16',
|
||||
],
|
||||
'conditions': [
|
||||
# Disable LTO in libyuv_neon target due to gcc 4.9 compiler bug.
|
||||
['clang == 0 and use_lto == 1', {
|
||||
@ -89,9 +84,6 @@
|
||||
'-mfpu=neon',
|
||||
# '-marm', # arm32 not thumb
|
||||
],
|
||||
'cflags_mozilla': [
|
||||
'-mfpu=neon',
|
||||
],
|
||||
}],
|
||||
],
|
||||
}],
|
||||
@ -100,15 +92,7 @@
|
||||
'LIBYUV_MSA',
|
||||
],
|
||||
}],
|
||||
['build_with_mozilla == 1', {
|
||||
'defines': [
|
||||
'HAVE_JPEG'
|
||||
],
|
||||
'cflags_mozilla': [
|
||||
'$(MOZ_JPEG_CFLAGS)',
|
||||
],
|
||||
}],
|
||||
['OS != "ios" and libyuv_disable_jpeg != 1 and build_with_mozilla != 1', {
|
||||
['OS != "ios" and libyuv_disable_jpeg != 1', {
|
||||
'defines': [
|
||||
'HAVE_JPEG'
|
||||
],
|
||||
|
@ -45,7 +45,7 @@ uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) {
|
||||
}
|
||||
#endif
|
||||
|
||||
while (count >= (uint64_t)(kBlockSize)) {
|
||||
while (count >= (uint64_t)kBlockSize) {
|
||||
seed = HashDjb2_SSE(src, kBlockSize, seed);
|
||||
src += kBlockSize;
|
||||
count -= kBlockSize;
|
||||
@ -359,10 +359,10 @@ static double Ssim8x8_C(const uint8_t* src_a,
|
||||
(sum_a_sq + sum_b_sq + c1) *
|
||||
(count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2);
|
||||
|
||||
if (ssim_d == 0.0) {
|
||||
if (ssim_d == 0) {
|
||||
return DBL_MAX;
|
||||
}
|
||||
return ssim_n * 1.0 / ssim_d;
|
||||
return (double)ssim_n / (double)ssim_d;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -67,7 +67,7 @@ uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
||||
:
|
||||
: "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10");
|
||||
|
||||
return static_cast<uint32_t>(diff);
|
||||
return (uint32_t)(diff);
|
||||
}
|
||||
#else
|
||||
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
||||
|
@ -24,6 +24,10 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Subsample amount uses a shift.
|
||||
// v is value
|
||||
// a is amount to add to round up
|
||||
// s is shift to subsample down
|
||||
#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
|
||||
static __inline int Abs(int v) {
|
||||
return v >= 0 ? v : -v;
|
||||
@ -291,6 +295,52 @@ int I210ToI422(const uint16_t* src_y,
|
||||
0, 10);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I410ToI420(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
const int depth = 10;
|
||||
const int scale = 1 << (24 - depth);
|
||||
|
||||
if (width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (height - 1) * src_stride_u;
|
||||
src_v = src_v + (height - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
|
||||
{
|
||||
const int uv_width = SUBSAMPLE(width, 1, 1);
|
||||
const int uv_height = SUBSAMPLE(height, 1, 1);
|
||||
|
||||
Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width,
|
||||
height);
|
||||
ScalePlaneDown2_16To8(width, height, uv_width, uv_height, src_stride_u,
|
||||
dst_stride_u, src_u, dst_u, scale, kFilterBilinear);
|
||||
ScalePlaneDown2_16To8(width, height, uv_width, uv_height, src_stride_v,
|
||||
dst_stride_v, src_v, dst_v, scale, kFilterBilinear);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I410ToI444(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
@ -732,6 +782,92 @@ int MM21ToYUY2(const uint8_t* src_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert MT2T into P010. See tinyurl.com/mtk-10bit-video-format for format
|
||||
// documentation.
|
||||
// TODO(greenjustin): Add an MT2T to I420 conversion.
|
||||
LIBYUV_API
|
||||
int MT2TToP010(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height) {
|
||||
if (width <= 0 || !height || !src_uv || !dst_uv) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
{
|
||||
int u_width = (width + 1) / 2;
|
||||
int uv_width = 2 * u_width;
|
||||
int y = 0;
|
||||
int uv_height = uv_height = (height + 1) / 2;
|
||||
const int tile_width = 16;
|
||||
const int y_tile_height = 32;
|
||||
const int uv_tile_height = 16;
|
||||
int padded_width = (width + tile_width - 1) & ~(tile_width - 1);
|
||||
int y_tile_row_size = padded_width * y_tile_height * 10 / 8;
|
||||
int uv_tile_row_size = padded_width * uv_tile_height * 10 / 8;
|
||||
size_t row_buf_size = padded_width * y_tile_height * sizeof(uint16_t);
|
||||
void (*UnpackMT2T)(const uint8_t* src, uint16_t* dst, size_t size) =
|
||||
UnpackMT2T_C;
|
||||
align_buffer_64(row_buf, row_buf_size);
|
||||
|
||||
#if defined(HAS_UNPACKMT2T_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
UnpackMT2T = UnpackMT2T_NEON;
|
||||
}
|
||||
#endif
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
uv_height = (height + 1) / 2;
|
||||
if (dst_y) {
|
||||
dst_y = dst_y + (height - 1) * dst_stride_y;
|
||||
dst_stride_y = -dst_stride_y;
|
||||
}
|
||||
dst_uv = dst_uv + (uv_height - 1) * dst_stride_uv;
|
||||
dst_stride_uv = -dst_stride_uv;
|
||||
}
|
||||
|
||||
// Unpack and detile Y in rows of tiles
|
||||
if (src_y && dst_y) {
|
||||
for (y = 0; y < (height & ~(y_tile_height - 1)); y += y_tile_height) {
|
||||
UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
|
||||
DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
|
||||
width, y_tile_height, y_tile_height);
|
||||
src_y += src_stride_y * y_tile_height;
|
||||
dst_y += dst_stride_y * y_tile_height;
|
||||
}
|
||||
if (height & (y_tile_height - 1)) {
|
||||
UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
|
||||
DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
|
||||
width, height & (y_tile_height - 1), y_tile_height);
|
||||
}
|
||||
}
|
||||
|
||||
// Unpack and detile UV plane
|
||||
for (y = 0; y < (uv_height & ~(uv_tile_height - 1)); y += uv_tile_height) {
|
||||
UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
|
||||
DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
|
||||
uv_width, uv_tile_height, uv_tile_height);
|
||||
src_uv += src_stride_uv * uv_tile_height;
|
||||
dst_uv += dst_stride_uv * uv_tile_height;
|
||||
}
|
||||
if (uv_height & (uv_tile_height - 1)) {
|
||||
UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
|
||||
DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
|
||||
uv_width, uv_height & (uv_tile_height - 1),
|
||||
uv_tile_height);
|
||||
}
|
||||
free_aligned_buffer_64(row_buf);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef I422TONV21_ROW_VERSION
|
||||
// Unittest fails for this version.
|
||||
// 422 chroma is 1/2 width, 1x height
|
||||
@ -753,7 +889,7 @@ int I422ToNV21(const uint8_t* src_y,
|
||||
int y;
|
||||
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
|
||||
uint8_t* dst_uv, int width) = MergeUVRow_C;
|
||||
void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
|
||||
void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
@ -1137,6 +1273,70 @@ int NV16ToNV24(const uint8_t* src_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Any P[420]1[02] to I[420]1[02] format with mirroring.
|
||||
static int PxxxToIxxx(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
int subsample_x,
|
||||
int subsample_y,
|
||||
int depth) {
|
||||
const int uv_width = SUBSAMPLE(width, subsample_x, subsample_x);
|
||||
const int uv_height = SUBSAMPLE(height, subsample_y, subsample_y);
|
||||
if (width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
ConvertToLSBPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height,
|
||||
depth);
|
||||
SplitUVPlane_16(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, uv_width, uv_height, depth);
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int P010ToI010(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
return PxxxToIxxx(src_y, src_stride_y, src_uv, src_stride_uv, dst_y,
|
||||
dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
|
||||
width, height, 1, 1, 10);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int P012ToI012(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
return PxxxToIxxx(src_y, src_stride_y, src_uv, src_stride_uv, dst_y,
|
||||
dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
|
||||
width, height, 1, 1, 12);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int P010ToP410(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
@ -1593,6 +1793,14 @@ int ARGBToI420(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -1707,13 +1915,21 @@ int BGRAToI420(const uint8_t* src_bgra,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_BGRATOYROW_LASX) && defined(HAS_BGRATOUVROW_LASX)
|
||||
#if defined(HAS_BGRATOYROW_LSX) && defined(HAS_BGRATOUVROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
BGRAToYRow = BGRAToYRow_Any_LSX;
|
||||
BGRAToUVRow = BGRAToUVRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
BGRAToYRow = BGRAToYRow_LSX;
|
||||
BGRAToUVRow = BGRAToUVRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_BGRATOYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
BGRAToYRow = BGRAToYRow_Any_LASX;
|
||||
BGRAToUVRow = BGRAToUVRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
BGRAToYRow = BGRAToYRow_LASX;
|
||||
BGRAToUVRow = BGRAToUVRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -1829,6 +2045,14 @@ int ABGRToI420(const uint8_t* src_abgr,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToYRow = ABGRToYRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
|
||||
@ -1925,6 +2149,14 @@ int RGBAToI420(const uint8_t* src_rgba,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGBATOYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
RGBAToYRow = RGBAToYRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RGBAToYRow = RGBAToYRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
|
||||
@ -2183,6 +2415,22 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Other platforms do intermediate conversion from RGB24 to ARGB.
|
||||
#else // HAS_RGB24TOYJROW
|
||||
@ -2511,6 +2759,22 @@ int RAWToJ420(const uint8_t* src_raw,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RAWToYJRow = RAWToYJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RAWToYJRow = RAWToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Other platforms do intermediate conversion from RAW to ARGB.
|
||||
#else // HAS_RAWTOYJROW
|
||||
@ -3098,6 +3362,14 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -3210,6 +3482,22 @@ int RGB24ToJ400(const uint8_t* src_rgb24,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RGB24ToYJRow(src_rgb24, dst_yj, width);
|
||||
@ -3278,6 +3566,22 @@ int RAWToJ400(const uint8_t* src_raw,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RAWToYJRow = RAWToYJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RAWToYJRow = RAWToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RAWToYJRow(src_raw, dst_yj, width);
|
||||
|
@ -116,6 +116,14 @@ int ARGBToI444(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -230,7 +238,14 @@ int ARGBToI422(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -340,6 +355,14 @@ int ARGBToNV12(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -502,6 +525,14 @@ int ARGBToNV21(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -663,6 +694,22 @@ int ABGRToNV12(const uint8_t* src_abgr,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToYRow = ABGRToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToYRow = ABGRToYRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_SSE2;
|
||||
@ -815,6 +862,22 @@ int ABGRToNV21(const uint8_t* src_abgr,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToYRow = ABGRToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToYRow = ABGRToYRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_SSE2;
|
||||
@ -972,6 +1035,14 @@ int ARGBToYUY2(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -1135,6 +1206,14 @@ int ARGBToUYVY(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -1262,6 +1341,14 @@ int ARGBToI400(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -1939,6 +2026,16 @@ int ARGBToJ420(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYJROW_LSX) && defined(HAS_ARGBTOUVJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYJRow = ARGBToYJRow_Any_LSX;
|
||||
ARGBToUVJRow = ARGBToUVJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYJRow = ARGBToYJRow_LSX;
|
||||
ARGBToUVJRow = ARGBToUVJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYJROW_LASX) && defined(HAS_ARGBTOUVJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYJRow = ARGBToYJRow_Any_LASX;
|
||||
@ -2215,6 +2312,22 @@ int RGBAToJ400(const uint8_t* src_rgba,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGBATOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
RGBAToYJRow = RGBAToYJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGBAToYJRow = RGBAToYJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGBATOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RGBAToYJRow = RGBAToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RGBAToYJRow = RGBAToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RGBAToYJRow(src_rgba, dst_yj, width);
|
||||
@ -2309,13 +2422,19 @@ int ABGRToJ420(const uint8_t* src_abgr,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYJROW_LSX) && defined(HAS_ABGRTOUVJROW_LSX)
|
||||
#if defined(HAS_ABGRTOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ABGRToYJRow = ABGRToYJRow_Any_LSX;
|
||||
ABGRToUVJRow = ABGRToUVJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToYJRow = ABGRToYJRow_LSX;
|
||||
ABGRToUVJRow = ABGRToUVJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ABGRToYJRow = ABGRToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToYJRow = ABGRToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -2430,23 +2549,19 @@ int ABGRToJ422(const uint8_t* src_abgr,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYJROW_LSX) && defined(HAS_ABGRTOUVJROW_LSX)
|
||||
#if defined(HAS_ABGRTOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ABGRToYJRow = ABGRToYJRow_Any_LSX;
|
||||
ABGRToUVJRow = ABGRToUVJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToYJRow = ABGRToYJRow_LSX;
|
||||
ABGRToUVJRow = ABGRToUVJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYJROW_LASX) && defined(HAS_ABGRTOUVJROW_LASX)
|
||||
#if defined(HAS_ABGRTOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ABGRToYJRow = ABGRToYJRow_Any_LASX;
|
||||
ABGRToUVJRow = ABGRToUVJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToYJRow = ABGRToYJRow_LASX;
|
||||
ABGRToUVJRow = ABGRToUVJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -2519,6 +2634,22 @@ int ABGRToJ400(const uint8_t* src_abgr,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ABGRToYJRow = ABGRToYJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToYJRow = ABGRToYJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ABGRToYJRow = ABGRToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToYJRow = ABGRToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ABGRToYJRow(src_abgr, dst_yj, width);
|
||||
@ -2713,6 +2844,22 @@ int RAWToJNV21(const uint8_t* src_raw,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RAWToYJRow = RAWToYJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RAWToYJRow = RAWToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Other platforms do intermediate conversion from RAW to ARGB.
|
||||
#else // HAS_RAWTOYJROW
|
||||
|
@ -79,9 +79,7 @@ MJpegDecoder::MJpegDecoder()
|
||||
decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
|
||||
// Override standard exit()-based error handler.
|
||||
error_mgr_->base.error_exit = &ErrorHandler;
|
||||
#ifndef DEBUG_MJPEG
|
||||
error_mgr_->base.output_message = &OutputHandler;
|
||||
#endif
|
||||
#endif
|
||||
decompress_struct_->client_data = NULL;
|
||||
source_mgr_->init_source = &init_source;
|
||||
@ -111,7 +109,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8_t* src, size_t src_len) {
|
||||
}
|
||||
|
||||
buf_.data = src;
|
||||
buf_.len = static_cast<int>(src_len);
|
||||
buf_.len = (int)src_len;
|
||||
buf_vec_.pos = 0;
|
||||
decompress_struct_->client_data = &buf_vec_;
|
||||
#ifdef HAVE_SETJMP
|
||||
@ -430,7 +428,7 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {
|
||||
|
||||
void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
|
||||
jpeg_source_mgr* src = cinfo->src;
|
||||
size_t bytes = static_cast<size_t>(num_bytes);
|
||||
size_t bytes = (size_t)num_bytes;
|
||||
if (bytes > src->bytes_in_buffer) {
|
||||
src->next_input_byte = nullptr;
|
||||
src->bytes_in_buffer = 0;
|
||||
@ -465,12 +463,11 @@ void ErrorHandler(j_common_ptr cinfo) {
|
||||
longjmp(mgr->setjmp_buffer, 1);
|
||||
}
|
||||
|
||||
#ifndef DEBUG_MJPEG
|
||||
// Suppress fprintf warnings.
|
||||
void OutputHandler(j_common_ptr cinfo) {
|
||||
(void)cinfo;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // HAVE_SETJMP
|
||||
|
||||
void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
|
||||
|
@ -333,6 +333,45 @@ int I210Copy(const uint16_t* src_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Copy I410.
|
||||
LIBYUV_API
|
||||
int I410Copy(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
|
||||
height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (height - 1) * src_stride_u;
|
||||
src_v = src_v + (height - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
|
||||
if (dst_y) {
|
||||
CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
}
|
||||
CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
|
||||
CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Copy I400.
|
||||
LIBYUV_API
|
||||
int I400ToI400(const uint8_t* src_y,
|
||||
@ -3196,6 +3235,7 @@ int RAWToRGB24(const uint8_t* src_raw,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Consider uint8_t value
|
||||
LIBYUV_API
|
||||
void SetPlane(uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
@ -3203,7 +3243,7 @@ void SetPlane(uint8_t* dst_y,
|
||||
int height,
|
||||
uint32_t value) {
|
||||
int y;
|
||||
void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C;
|
||||
void (*SetRow)(uint8_t* dst, uint8_t value, int width) = SetRow_C;
|
||||
|
||||
if (width <= 0 || height == 0) {
|
||||
return;
|
||||
@ -3256,7 +3296,7 @@ void SetPlane(uint8_t* dst_y,
|
||||
|
||||
// Set plane
|
||||
for (y = 0; y < height; ++y) {
|
||||
SetRow(dst_y, value, width);
|
||||
SetRow(dst_y, (uint8_t)value, width);
|
||||
dst_y += dst_stride_y;
|
||||
}
|
||||
}
|
||||
@ -3304,7 +3344,7 @@ int ARGBRect(uint8_t* dst_argb,
|
||||
int height,
|
||||
uint32_t value) {
|
||||
int y;
|
||||
void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) =
|
||||
void (*ARGBSetRow)(uint8_t* dst_argb, uint32_t value, int width) =
|
||||
ARGBSetRow_C;
|
||||
if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
|
||||
return -1;
|
||||
@ -3609,7 +3649,7 @@ int ARGBSepia(uint8_t* dst_argb,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C;
|
||||
void (*ARGBSepiaRow)(uint8_t* dst_argb, int width) = ARGBSepiaRow_C;
|
||||
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
|
||||
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
|
||||
return -1;
|
||||
@ -3752,7 +3792,7 @@ int ARGBColorTable(uint8_t* dst_argb,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
|
||||
void (*ARGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
|
||||
int width) = ARGBColorTableRow_C;
|
||||
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
|
||||
if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
|
||||
@ -3788,7 +3828,7 @@ int RGBColorTable(uint8_t* dst_argb,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
|
||||
void (*RGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
|
||||
int width) = RGBColorTableRow_C;
|
||||
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
|
||||
if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
|
||||
@ -3833,7 +3873,7 @@ int ARGBQuantize(uint8_t* dst_argb,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size,
|
||||
void (*ARGBQuantizeRow)(uint8_t* dst_argb, int scale, int interval_size,
|
||||
int interval_offset, int width) = ARGBQuantizeRow_C;
|
||||
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
|
||||
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
|
||||
@ -4086,7 +4126,7 @@ int InterpolatePlane(const uint8_t* src0,
|
||||
int height,
|
||||
int interpolation) {
|
||||
int y;
|
||||
void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
|
||||
void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
|
||||
@ -4166,7 +4206,7 @@ int InterpolatePlane_16(const uint16_t* src0,
|
||||
int height,
|
||||
int interpolation) {
|
||||
int y;
|
||||
void (*InterpolateRow_16)(uint16_t * dst_ptr, const uint16_t* src_ptr,
|
||||
void (*InterpolateRow_16)(uint16_t* dst_ptr, const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_16_C;
|
||||
if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
|
||||
@ -5281,7 +5321,7 @@ int UYVYToNV12(const uint8_t* src_uyvy,
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
|
||||
int width) = SplitUVRow_C;
|
||||
void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
|
||||
void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
|
||||
|
@ -138,7 +138,7 @@ void RotatePlane180(const uint8_t* src,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Swap first and last row and mirror the content. Uses a temporary row.
|
||||
// Swap top and bottom row and mirror the content. Uses a temporary row.
|
||||
align_buffer_64(row, width);
|
||||
const uint8_t* src_bot = src + src_stride * (height - 1);
|
||||
uint8_t* dst_bot = dst + dst_stride * (height - 1);
|
||||
@ -209,9 +209,9 @@ void RotatePlane180(const uint8_t* src,
|
||||
|
||||
// Odd height will harmlessly mirror the middle row twice.
|
||||
for (y = 0; y < half_height; ++y) {
|
||||
CopyRow(src, row, width); // Copy first row into buffer
|
||||
MirrorRow(src_bot, dst, width); // Mirror last row into first row
|
||||
MirrorRow(row, dst_bot, width); // Mirror buffer into last row
|
||||
CopyRow(src, row, width); // Copy top row into buffer
|
||||
MirrorRow(src_bot, dst, width); // Mirror bottom row into top row
|
||||
MirrorRow(row, dst_bot, width); // Mirror buffer into bottom row
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
src_bot -= src_stride;
|
||||
@ -476,6 +476,120 @@ int RotatePlane(const uint8_t* src,
|
||||
return -1;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void TransposePlane_16(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
int i = height;
|
||||
// Work across the source in 8x8 tiles
|
||||
while (i >= 8) {
|
||||
TransposeWx8_16_C(src, src_stride, dst, dst_stride, width);
|
||||
src += 8 * src_stride; // Go down 8 rows.
|
||||
dst += 8; // Move over 8 columns.
|
||||
i -= 8;
|
||||
}
|
||||
|
||||
if (i > 0) {
|
||||
TransposeWxH_16_C(src, src_stride, dst, dst_stride, width, i);
|
||||
}
|
||||
}
|
||||
|
||||
static void RotatePlane90_16(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Rotate by 90 is a transpose with the source read
|
||||
// from bottom to top. So set the source pointer to the end
|
||||
// of the buffer and flip the sign of the source stride.
|
||||
src += src_stride * (height - 1);
|
||||
src_stride = -src_stride;
|
||||
TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
|
||||
}
|
||||
|
||||
static void RotatePlane270_16(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Rotate by 270 is a transpose with the destination written
|
||||
// from bottom to top. So set the destination pointer to the end
|
||||
// of the buffer and flip the sign of the destination stride.
|
||||
dst += dst_stride * (width - 1);
|
||||
dst_stride = -dst_stride;
|
||||
TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
|
||||
}
|
||||
|
||||
static void RotatePlane180_16(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Swap top and bottom row and mirror the content. Uses a temporary row.
|
||||
align_buffer_64_16(row, width);
|
||||
const uint16_t* src_bot = src + src_stride * (height - 1);
|
||||
uint16_t* dst_bot = dst + dst_stride * (height - 1);
|
||||
int half_height = (height + 1) >> 1;
|
||||
int y;
|
||||
|
||||
// Odd height will harmlessly mirror the middle row twice.
|
||||
for (y = 0; y < half_height; ++y) {
|
||||
CopyRow_16_C(src, row, width); // Copy top row into buffer
|
||||
MirrorRow_16_C(src_bot, dst, width); // Mirror bottom row into top row
|
||||
MirrorRow_16_C(row, dst_bot, width); // Mirror buffer into bottom row
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
src_bot -= src_stride;
|
||||
dst_bot -= dst_stride;
|
||||
}
|
||||
free_aligned_buffer_64_16(row);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int RotatePlane_16(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode) {
|
||||
if (!src || width <= 0 || height == 0 || !dst) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src = src + (height - 1) * src_stride;
|
||||
src_stride = -src_stride;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// copy frame
|
||||
CopyPlane_16(src, src_stride, dst, dst_stride, width, height);
|
||||
return 0;
|
||||
case kRotate90:
|
||||
RotatePlane90_16(src, src_stride, dst, dst_stride, width, height);
|
||||
return 0;
|
||||
case kRotate270:
|
||||
RotatePlane270_16(src, src_stride, dst, dst_stride, width, height);
|
||||
return 0;
|
||||
case kRotate180:
|
||||
RotatePlane180_16(src, src_stride, dst, dst_stride, width, height);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I420Rotate(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
@ -544,6 +658,8 @@ int I420Rotate(const uint8_t* src_y,
|
||||
return -1;
|
||||
}
|
||||
|
||||
// I422 has half width x full height UV planes, so rotate by 90 and 270
|
||||
// require scaling to maintain 422 subsampling.
|
||||
LIBYUV_API
|
||||
int I422Rotate(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
@ -579,31 +695,42 @@ int I422Rotate(const uint8_t* src_y,
|
||||
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// copy frame
|
||||
// Copy frame
|
||||
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
|
||||
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
|
||||
return 0;
|
||||
|
||||
// Note on temporary Y plane for UV.
|
||||
// Rotation of UV first fits within the Y destination plane rows.
|
||||
// Y plane is width x height
|
||||
// Y plane rotated is height x width
|
||||
// UV plane is (width / 2) x height
|
||||
// UV plane rotated is height x (width / 2)
|
||||
// UV plane rotated+scaled is (height / 2) x width.
|
||||
// UV plane rotated is a temporary that fits within the Y plane rotated.
|
||||
|
||||
case kRotate90:
|
||||
// We need to rotate and rescale, we use plane Y as temporal storage.
|
||||
RotatePlane90(src_u, src_stride_u, dst_y, height, halfwidth, height);
|
||||
ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight,
|
||||
RotatePlane90(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
|
||||
height);
|
||||
ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
|
||||
halfheight, width, kFilterBilinear);
|
||||
RotatePlane90(src_v, src_stride_v, dst_y, height, halfwidth, height);
|
||||
ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight,
|
||||
RotatePlane90(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
|
||||
height);
|
||||
ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
|
||||
halfheight, width, kFilterLinear);
|
||||
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
return 0;
|
||||
case kRotate270:
|
||||
// We need to rotate and rescale, we use plane Y as temporal storage.
|
||||
RotatePlane270(src_u, src_stride_u, dst_y, height, halfwidth, height);
|
||||
ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight,
|
||||
RotatePlane270(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
|
||||
height);
|
||||
ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
|
||||
halfheight, width, kFilterBilinear);
|
||||
RotatePlane270(src_v, src_stride_v, dst_y, height, halfwidth, height);
|
||||
ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight,
|
||||
RotatePlane270(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
|
||||
height);
|
||||
ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
|
||||
halfheight, width, kFilterLinear);
|
||||
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
|
||||
return 0;
|
||||
case kRotate180:
|
||||
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
@ -828,6 +955,228 @@ int Android420ToI420Rotate(const uint8_t* src_y,
|
||||
return -1;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I010Rotate(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode) {
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
|
||||
!dst_u || !dst_v || dst_stride_y < 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (height - 1) * src_stride_u;
|
||||
src_v = src_v + (height - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// copy frame
|
||||
return I010Copy(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v, width, height);
|
||||
case kRotate90:
|
||||
RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane90_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
|
||||
halfheight);
|
||||
RotatePlane90_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
case kRotate270:
|
||||
RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
|
||||
height);
|
||||
RotatePlane270_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
|
||||
halfheight);
|
||||
RotatePlane270_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
case kRotate180:
|
||||
RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
|
||||
height);
|
||||
RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
|
||||
halfheight);
|
||||
RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// I210 has half width x full height UV planes, so rotate by 90 and 270
|
||||
// require scaling to maintain 422 subsampling.
|
||||
LIBYUV_API
|
||||
int I210Rotate(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode) {
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
|
||||
!dst_u || !dst_v) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (height - 1) * src_stride_u;
|
||||
src_v = src_v + (height - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// Copy frame
|
||||
CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
|
||||
CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
|
||||
return 0;
|
||||
|
||||
// Note on temporary Y plane for UV.
|
||||
// Rotation of UV first fits within the Y destination plane rows.
|
||||
// Y plane is width x height
|
||||
// Y plane rotated is height x width
|
||||
// UV plane is (width / 2) x height
|
||||
// UV plane rotated is height x (width / 2)
|
||||
// UV plane rotated+scaled is (height / 2) x width.
|
||||
// UV plane rotated is a temporary that fits within the Y plane rotated.
|
||||
|
||||
case kRotate90:
|
||||
RotatePlane90_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
|
||||
height);
|
||||
ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
|
||||
halfheight, width, kFilterBilinear);
|
||||
RotatePlane90_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
|
||||
height);
|
||||
ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
|
||||
halfheight, width, kFilterLinear);
|
||||
RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
return 0;
|
||||
case kRotate270:
|
||||
RotatePlane270_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
|
||||
height);
|
||||
ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
|
||||
halfheight, width, kFilterBilinear);
|
||||
RotatePlane270_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
|
||||
height);
|
||||
ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
|
||||
halfheight, width, kFilterLinear);
|
||||
RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
|
||||
height);
|
||||
return 0;
|
||||
case kRotate180:
|
||||
RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
|
||||
height);
|
||||
RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
|
||||
height);
|
||||
RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
|
||||
height);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I410Rotate(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode) {
|
||||
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
|
||||
!dst_u || !dst_v || dst_stride_y < 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (height - 1) * src_stride_u;
|
||||
src_v = src_v + (height - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// copy frame
|
||||
CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
|
||||
CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
|
||||
return 0;
|
||||
case kRotate90:
|
||||
RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane90_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
|
||||
RotatePlane90_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
|
||||
return 0;
|
||||
case kRotate270:
|
||||
RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
|
||||
height);
|
||||
RotatePlane270_16(src_u, src_stride_u, dst_u, dst_stride_u, width,
|
||||
height);
|
||||
RotatePlane270_16(src_v, src_stride_v, dst_v, dst_stride_v, width,
|
||||
height);
|
||||
return 0;
|
||||
case kRotate180:
|
||||
RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
|
||||
height);
|
||||
RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, width,
|
||||
height);
|
||||
RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, width,
|
||||
height);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
@ -94,8 +94,74 @@ void TransposeUVWxH_C(const uint8_t* src,
|
||||
for (i = 0; i < width * 2; i += 2) {
|
||||
int j;
|
||||
for (j = 0; j < height; ++j) {
|
||||
dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
|
||||
dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
|
||||
dst_a[((i >> 1) * dst_stride_a) + j] = src[i + (j * src_stride)];
|
||||
dst_b[((i >> 1) * dst_stride_b) + j] = src[i + (j * src_stride) + 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeWx8_16_C(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
dst[0] = src[0 * src_stride];
|
||||
dst[1] = src[1 * src_stride];
|
||||
dst[2] = src[2 * src_stride];
|
||||
dst[3] = src[3 * src_stride];
|
||||
dst[4] = src[4 * src_stride];
|
||||
dst[5] = src[5 * src_stride];
|
||||
dst[6] = src[6 * src_stride];
|
||||
dst[7] = src[7 * src_stride];
|
||||
++src;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeUVWx8_16_C(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint16_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
dst_a[0] = src[0 * src_stride + 0];
|
||||
dst_b[0] = src[0 * src_stride + 1];
|
||||
dst_a[1] = src[1 * src_stride + 0];
|
||||
dst_b[1] = src[1 * src_stride + 1];
|
||||
dst_a[2] = src[2 * src_stride + 0];
|
||||
dst_b[2] = src[2 * src_stride + 1];
|
||||
dst_a[3] = src[3 * src_stride + 0];
|
||||
dst_b[3] = src[3 * src_stride + 1];
|
||||
dst_a[4] = src[4 * src_stride + 0];
|
||||
dst_b[4] = src[4 * src_stride + 1];
|
||||
dst_a[5] = src[5 * src_stride + 0];
|
||||
dst_b[5] = src[5 * src_stride + 1];
|
||||
dst_a[6] = src[6 * src_stride + 0];
|
||||
dst_b[6] = src[6 * src_stride + 1];
|
||||
dst_a[7] = src[7 * src_stride + 0];
|
||||
dst_b[7] = src[7 * src_stride + 1];
|
||||
src += 2;
|
||||
dst_a += dst_stride_a;
|
||||
dst_b += dst_stride_b;
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeWxH_16_C(const uint16_t* src,
|
||||
int src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
int j;
|
||||
for (j = 0; j < height; ++j) {
|
||||
dst[i * dst_stride + j] = src[j * src_stride + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -201,13 +201,13 @@ void TransposeWx8_NEON(const uint8_t* src,
|
||||
|
||||
"4: \n"
|
||||
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst), // %2
|
||||
"+r"(width) // %3
|
||||
: "r"(&kVTbl4x4Transpose), // %4
|
||||
"r"(static_cast<ptrdiff_t>(src_stride)), // %5
|
||||
"r"(static_cast<ptrdiff_t>(dst_stride)) // %6
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst), // %2
|
||||
"+r"(width) // %3
|
||||
: "r"(&kVTbl4x4Transpose), // %4
|
||||
"r"((ptrdiff_t)src_stride), // %5
|
||||
"r"((ptrdiff_t)dst_stride) // %6
|
||||
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
|
||||
"v17", "v18", "v19", "v20", "v21", "v22", "v23");
|
||||
}
|
||||
@ -423,15 +423,15 @@ void TransposeUVWx8_NEON(const uint8_t* src,
|
||||
|
||||
"4: \n"
|
||||
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst_a), // %2
|
||||
"+r"(dst_b), // %3
|
||||
"+r"(width) // %4
|
||||
: "r"(static_cast<ptrdiff_t>(src_stride)), // %5
|
||||
"r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6
|
||||
"r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7
|
||||
"r"(&kVTbl4x4TransposeDi) // %8
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst_a), // %2
|
||||
"+r"(dst_b), // %3
|
||||
"+r"(width) // %4
|
||||
: "r"((ptrdiff_t)src_stride), // %5
|
||||
"r"((ptrdiff_t)dst_stride_a), // %6
|
||||
"r"((ptrdiff_t)dst_stride_b), // %7
|
||||
"r"(&kVTbl4x4TransposeDi) // %8
|
||||
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
|
||||
"v17", "v18", "v19", "v20", "v21", "v22", "v23", "v30", "v31");
|
||||
}
|
||||
|
@ -1036,6 +1036,9 @@ ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 15)
|
||||
#ifdef HAS_ARGBTOYROW_MSA
|
||||
ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOYROW_LSX
|
||||
ANY11(ARGBToYRow_Any_LSX, ARGBToYRow_LSX, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOYROW_LASX
|
||||
ANY11(ARGBToYRow_Any_LASX, ARGBToYRow_LASX, 0, 4, 1, 31)
|
||||
#endif
|
||||
@ -1054,9 +1057,21 @@ ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15)
|
||||
#ifdef HAS_ARGBTOYJROW_LSX
|
||||
ANY11(ARGBToYJRow_Any_LSX, ARGBToYJRow_LSX, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGBATOYJROW_LSX
|
||||
ANY11(RGBAToYJRow_Any_LSX, RGBAToYJRow_LSX, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOYJROW_LSX
|
||||
ANY11(ABGRToYJRow_Any_LSX, ABGRToYJRow_LSX, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGBATOYJROW_LASX
|
||||
ANY11(RGBAToYJRow_Any_LASX, RGBAToYJRow_LASX, 0, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOYJROW_LASX
|
||||
ANY11(ARGBToYJRow_Any_LASX, ARGBToYJRow_LASX, 0, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOYJROW_LASX
|
||||
ANY11(ABGRToYJRow_Any_LASX, ABGRToYJRow_LASX, 0, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_BGRATOYROW_NEON
|
||||
ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 15)
|
||||
#endif
|
||||
@ -1066,6 +1081,9 @@ ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15)
|
||||
#ifdef HAS_BGRATOYROW_LSX
|
||||
ANY11(BGRAToYRow_Any_LSX, BGRAToYRow_LSX, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_BGRATOYROW_LASX
|
||||
ANY11(BGRAToYRow_Any_LASX, BGRAToYRow_LASX, 0, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOYROW_NEON
|
||||
ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 15)
|
||||
#endif
|
||||
@ -1075,6 +1093,9 @@ ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7)
|
||||
#ifdef HAS_ABGRTOYROW_LSX
|
||||
ANY11(ABGRToYRow_Any_LSX, ABGRToYRow_LSX, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOYROW_LASX
|
||||
ANY11(ABGRToYRow_Any_LASX, ABGRToYRow_LASX, 0, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_RGBATOYROW_NEON
|
||||
ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 15)
|
||||
#endif
|
||||
@ -1084,6 +1105,9 @@ ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15)
|
||||
#ifdef HAS_RGBATOYROW_LSX
|
||||
ANY11(RGBAToYRow_Any_LSX, RGBAToYRow_LSX, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGBATOYROW_LASX
|
||||
ANY11(RGBAToYRow_Any_LASX, RGBAToYRow_LASX, 0, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOYROW_NEON
|
||||
ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 15)
|
||||
#endif
|
||||
@ -1102,6 +1126,12 @@ ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
|
||||
#ifdef HAS_RGB24TOYROW_LSX
|
||||
ANY11(RGB24ToYRow_Any_LSX, RGB24ToYRow_LSX, 0, 3, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOYJROW_LSX
|
||||
ANY11(RGB24ToYJRow_Any_LSX, RGB24ToYJRow_LSX, 0, 3, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOYJROW_LASX
|
||||
ANY11(RGB24ToYJRow_Any_LASX, RGB24ToYJRow_LASX, 0, 3, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOYROW_LASX
|
||||
ANY11(RGB24ToYRow_Any_LASX, RGB24ToYRow_LASX, 0, 3, 1, 31)
|
||||
#endif
|
||||
@ -1126,6 +1156,12 @@ ANY11(RAWToYRow_Any_LSX, RAWToYRow_LSX, 0, 3, 1, 15)
|
||||
#ifdef HAS_RAWTOYROW_LASX
|
||||
ANY11(RAWToYRow_Any_LASX, RAWToYRow_LASX, 0, 3, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_RAWTOYJROW_LSX
|
||||
ANY11(RAWToYJRow_Any_LSX, RAWToYJRow_LSX, 0, 3, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RAWTOYJROW_LASX
|
||||
ANY11(RAWToYJRow_Any_LASX, RAWToYJRow_LASX, 0, 3, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_RGB565TOYROW_NEON
|
||||
ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -775,40 +775,6 @@ void UYVYToUV422Row_LASX(const uint8_t* src_uyvy,
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 32;
|
||||
__m256i src0, src1, src2, src3, vec0, vec1, vec2, vec3;
|
||||
__m256i tmp0, tmp1, dst0;
|
||||
__m256i const_19 = __lasx_xvldi(0x19);
|
||||
__m256i const_42 = __lasx_xvldi(0x42);
|
||||
__m256i const_81 = __lasx_xvldi(0x81);
|
||||
__m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
|
||||
0x1080108010801080, 0x1080108010801080};
|
||||
__m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
|
||||
0x0000000700000003};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64,
|
||||
src_argb0, 96, src0, src1, src2, src3);
|
||||
vec0 = __lasx_xvpickev_b(src1, src0);
|
||||
vec1 = __lasx_xvpickev_b(src3, src2);
|
||||
vec2 = __lasx_xvpickod_b(src1, src0);
|
||||
vec3 = __lasx_xvpickod_b(src3, src2);
|
||||
tmp0 = __lasx_xvmaddwev_h_bu(const_1080, vec0, const_19);
|
||||
tmp1 = __lasx_xvmaddwev_h_bu(const_1080, vec1, const_19);
|
||||
tmp0 = __lasx_xvmaddwev_h_bu(tmp0, vec2, const_81);
|
||||
tmp1 = __lasx_xvmaddwev_h_bu(tmp1, vec3, const_81);
|
||||
tmp0 = __lasx_xvmaddwod_h_bu(tmp0, vec0, const_42);
|
||||
tmp1 = __lasx_xvmaddwod_h_bu(tmp1, vec1, const_42);
|
||||
dst0 = __lasx_xvssrani_b_h(tmp1, tmp0, 8);
|
||||
dst0 = __lasx_xvperm_w(dst0, control);
|
||||
__lasx_xvst(dst0, dst_y, 0);
|
||||
src_argb0 += 128;
|
||||
dst_y += 32;
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBToUVRow_LASX(const uint8_t* src_argb0,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
@ -1811,48 +1777,6 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565,
|
||||
}
|
||||
}
|
||||
|
||||
void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 32;
|
||||
__m256i src0, src1, src2;
|
||||
__m256i tmp0, tmp1, tmp2, tmp3;
|
||||
__m256i reg0, reg1, reg2, dst0;
|
||||
__m256i const_129 = __lasx_xvldi(129);
|
||||
__m256i const_br = {0x4219421942194219, 0x4219421942194219,
|
||||
0x4219421942194219, 0x4219421942194219};
|
||||
__m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
|
||||
0x1080108010801080, 0x1080108010801080};
|
||||
__m256i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C, 0x0B09080605030200,
|
||||
0x17151412110F0E0C};
|
||||
__m256i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604, 0x0301001E1D1B1A18,
|
||||
0x0F0D0C0A09070604};
|
||||
__m256i shuff2 = {0x000A000700040001, 0x001600130010000D, 0x000A000700040001,
|
||||
0x001600130010000D};
|
||||
__m256i shuff3 = {0x0002001F001C0019, 0x000E000B00080005, 0x0002001F001C0019,
|
||||
0x000E000B00080005};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
reg0 = __lasx_xvld(src_rgb24, 0);
|
||||
reg1 = __lasx_xvld(src_rgb24, 32);
|
||||
reg2 = __lasx_xvld(src_rgb24, 64);
|
||||
src0 = __lasx_xvpermi_q(reg1, reg0, 0x30);
|
||||
src1 = __lasx_xvpermi_q(reg2, reg0, 0x21);
|
||||
src2 = __lasx_xvpermi_q(reg2, reg1, 0x30);
|
||||
tmp0 = __lasx_xvshuf_b(src1, src0, shuff0);
|
||||
tmp1 = __lasx_xvshuf_b(src1, src2, shuff1);
|
||||
tmp2 = __lasx_xvshuf_b(src1, src0, shuff2);
|
||||
tmp3 = __lasx_xvshuf_b(src1, src2, shuff3);
|
||||
reg0 = __lasx_xvmaddwev_h_bu(const_1080, tmp2, const_129);
|
||||
reg1 = __lasx_xvmaddwev_h_bu(const_1080, tmp3, const_129);
|
||||
reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp1);
|
||||
dst0 = __lasx_xvpickod_b(reg1, reg0);
|
||||
__lasx_xvst(dst0, dst_y, 0);
|
||||
dst_y += 32;
|
||||
src_rgb24 += 96;
|
||||
}
|
||||
}
|
||||
|
||||
void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8_t* dst_u,
|
||||
@ -1916,48 +1840,6 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
|
||||
}
|
||||
}
|
||||
|
||||
void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 32;
|
||||
__m256i src0, src1, src2;
|
||||
__m256i tmp0, tmp1, tmp2, tmp3;
|
||||
__m256i reg0, reg1, reg2, dst0;
|
||||
__m256i const_129 = __lasx_xvldi(129);
|
||||
__m256i const_br = {0x1942194219421942, 0x1942194219421942,
|
||||
0x1942194219421942, 0x1942194219421942};
|
||||
__m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
|
||||
0x1080108010801080, 0x1080108010801080};
|
||||
__m256i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C, 0x0B09080605030200,
|
||||
0x17151412110F0E0C};
|
||||
__m256i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604, 0x0301001E1D1B1A18,
|
||||
0x0F0D0C0A09070604};
|
||||
__m256i shuff2 = {0x000A000700040001, 0x001600130010000D, 0x000A000700040001,
|
||||
0x001600130010000D};
|
||||
__m256i shuff3 = {0x0002001F001C0019, 0x000E000B00080005, 0x0002001F001C0019,
|
||||
0x000E000B00080005};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
reg0 = __lasx_xvld(src_raw, 0);
|
||||
reg1 = __lasx_xvld(src_raw, 32);
|
||||
reg2 = __lasx_xvld(src_raw, 64);
|
||||
src0 = __lasx_xvpermi_q(reg1, reg0, 0x30);
|
||||
src1 = __lasx_xvpermi_q(reg2, reg0, 0x21);
|
||||
src2 = __lasx_xvpermi_q(reg2, reg1, 0x30);
|
||||
tmp0 = __lasx_xvshuf_b(src1, src0, shuff0);
|
||||
tmp1 = __lasx_xvshuf_b(src1, src2, shuff1);
|
||||
tmp2 = __lasx_xvshuf_b(src1, src0, shuff2);
|
||||
tmp3 = __lasx_xvshuf_b(src1, src2, shuff3);
|
||||
reg0 = __lasx_xvmaddwev_h_bu(const_1080, tmp2, const_129);
|
||||
reg1 = __lasx_xvmaddwev_h_bu(const_1080, tmp3, const_129);
|
||||
reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp1);
|
||||
dst0 = __lasx_xvpickod_b(reg1, reg0);
|
||||
__lasx_xvst(dst0, dst_y, 0);
|
||||
dst_y += 32;
|
||||
src_raw += 96;
|
||||
}
|
||||
}
|
||||
|
||||
void RAWToUVRow_LASX(const uint8_t* src_raw,
|
||||
int src_stride_raw,
|
||||
uint8_t* dst_u,
|
||||
@ -2118,36 +2000,226 @@ void NV21ToARGBRow_LASX(const uint8_t* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBToYJRow_LASX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 32;
|
||||
__m256i src0, src1, src2, src3, dst0;
|
||||
__m256i tmp0, tmp1, tmp2, tmp3;
|
||||
__m256i reg0, reg1;
|
||||
__m256i const_128 = __lasx_xvldi(0x480);
|
||||
__m256i const_150 = __lasx_xvldi(0x96);
|
||||
__m256i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D,
|
||||
0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D};
|
||||
__m256i shuff = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
|
||||
0x0000000700000003};
|
||||
struct RgbConstants {
|
||||
uint8_t kRGBToY[4];
|
||||
uint16_t kAddY;
|
||||
uint16_t pad;
|
||||
};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src_argb, 64, src_argb,
|
||||
96, src0, src1, src2, src3);
|
||||
tmp0 = __lasx_xvpickev_b(src1, src0);
|
||||
tmp1 = __lasx_xvpickod_b(src1, src0);
|
||||
tmp2 = __lasx_xvpickev_b(src3, src2);
|
||||
tmp3 = __lasx_xvpickod_b(src3, src2);
|
||||
reg0 = __lasx_xvmaddwev_h_bu(const_128, tmp1, const_150);
|
||||
reg1 = __lasx_xvmaddwev_h_bu(const_128, tmp3, const_150);
|
||||
reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp2);
|
||||
dst0 = __lasx_xvpickod_b(reg1, reg0);
|
||||
dst0 = __lasx_xvperm_w(dst0, shuff);
|
||||
__lasx_xvst(dst0, dst_y, 0);
|
||||
dst_y += 32;
|
||||
src_argb += 128;
|
||||
}
|
||||
// RGB to JPeg coefficients
|
||||
// B * 0.1140 coefficient = 29
|
||||
// G * 0.5870 coefficient = 150
|
||||
// R * 0.2990 coefficient = 77
|
||||
// Add 0.5 = 0x80
|
||||
static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
|
||||
128,
|
||||
0};
|
||||
|
||||
static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
|
||||
|
||||
// RGB to BT.601 coefficients
|
||||
// B * 0.1016 coefficient = 25
|
||||
// G * 0.5078 coefficient = 129
|
||||
// R * 0.2578 coefficient = 66
|
||||
// Add 16.5 = 0x1080
|
||||
|
||||
static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
|
||||
0x1080,
|
||||
0};
|
||||
|
||||
static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
|
||||
0x1080,
|
||||
0};
|
||||
|
||||
// ARGB expects first 3 values to contain RGB and 4th value is ignored.
|
||||
static void ARGBToYMatrixRow_LASX(const uint8_t* src_argb,
|
||||
uint8_t* dst_y,
|
||||
int width,
|
||||
const struct RgbConstants* rgbconstants) {
|
||||
int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7};
|
||||
asm volatile(
|
||||
"xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
|
||||
"xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants
|
||||
"xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants
|
||||
"xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants
|
||||
"xvld $xr20, %4, 0 \n\t" // load shuff
|
||||
"1: \n\t"
|
||||
"xvld $xr4, %0, 0 \n\t"
|
||||
"xvld $xr5, %0, 32 \n\t"
|
||||
"xvld $xr6, %0, 64 \n\t"
|
||||
"xvld $xr7, %0, 96 \n\t" // load 32 pixels of ARGB
|
||||
"xvor.v $xr12, $xr3, $xr3 \n\t"
|
||||
"xvor.v $xr13, $xr3, $xr3 \n\t"
|
||||
"addi.d %2, %2, -32 \n\t" // 32 processed per loop.
|
||||
"xvpickev.b $xr8, $xr5, $xr4 \n\t" //BR
|
||||
"xvpickev.b $xr10, $xr7, $xr6 \n\t"
|
||||
"xvpickod.b $xr9, $xr5, $xr4 \n\t" //GA
|
||||
"xvpickod.b $xr11, $xr7, $xr6 \n\t"
|
||||
"xvmaddwev.h.bu $xr12, $xr8, $xr0 \n\t" //B
|
||||
"xvmaddwev.h.bu $xr13, $xr10, $xr0 \n\t"
|
||||
"xvmaddwev.h.bu $xr12, $xr9, $xr1 \n\t" //G
|
||||
"xvmaddwev.h.bu $xr13, $xr11, $xr1 \n\t"
|
||||
"xvmaddwod.h.bu $xr12, $xr8, $xr2 \n\t" //R
|
||||
"xvmaddwod.h.bu $xr13, $xr10, $xr2 \n\t"
|
||||
"addi.d %0, %0, 128 \n\t"
|
||||
"xvpickod.b $xr10, $xr13, $xr12 \n\t"
|
||||
"xvperm.w $xr11, $xr10, $xr20 \n\t"
|
||||
"xvst $xr11, %1, 0 \n\t"
|
||||
"addi.d %1, %1, 32 \n\t"
|
||||
"bnez %2, 1b \n\t"
|
||||
: "+&r"(src_argb), // %0
|
||||
"+&r"(dst_y), // %1
|
||||
"+&r"(width) // %2
|
||||
: "r"(rgbconstants),
|
||||
"r"(shuff)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToYRow_LASX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
ARGBToYMatrixRow_LASX(src_argb, dst_y, width, &kRgb24I601Constants);
|
||||
}
|
||||
|
||||
void ARGBToYJRow_LASX(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
|
||||
ARGBToYMatrixRow_LASX(src_argb, dst_yj, width, &kRgb24JPEGConstants);
|
||||
}
|
||||
|
||||
void ABGRToYRow_LASX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
|
||||
ARGBToYMatrixRow_LASX(src_abgr, dst_y, width, &kRawI601Constants);
|
||||
}
|
||||
|
||||
void ABGRToYJRow_LASX(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
|
||||
ARGBToYMatrixRow_LASX(src_abgr, dst_yj, width, &kRawJPEGConstants);
|
||||
}
|
||||
|
||||
// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
|
||||
// Same code as ARGB, except the LD4
|
||||
static void RGBAToYMatrixRow_LASX(const uint8_t* src_rgba,
|
||||
uint8_t* dst_y,
|
||||
int width,
|
||||
const struct RgbConstants* rgbconstants) {
|
||||
int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7};
|
||||
asm volatile(
|
||||
"xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
|
||||
"xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants
|
||||
"xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants
|
||||
"xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants
|
||||
"xvld $xr20, %4, 0 \n\t" // load shuff
|
||||
"1: \n\t"
|
||||
"xvld $xr4, %0, 0 \n\t"
|
||||
"xvld $xr5, %0, 32 \n\t"
|
||||
"xvld $xr6, %0, 64 \n\t"
|
||||
"xvld $xr7, %0, 96 \n\t" // load 32 pixels of RGBA
|
||||
"xvor.v $xr12, $xr3, $xr3 \n\t"
|
||||
"xvor.v $xr13, $xr3, $xr3 \n\t"
|
||||
"addi.d %2, %2, -32 \n\t" // 32 processed per loop.
|
||||
"xvpickev.b $xr8, $xr5, $xr4 \n\t" //AG
|
||||
"xvpickev.b $xr10, $xr7, $xr6 \n\t"
|
||||
"xvpickod.b $xr9, $xr5, $xr4 \n\t" //BR
|
||||
"xvpickod.b $xr11, $xr7, $xr6 \n\t"
|
||||
"xvmaddwev.h.bu $xr12, $xr9, $xr0 \n\t" //B
|
||||
"xvmaddwev.h.bu $xr13, $xr11, $xr0 \n\t"
|
||||
"xvmaddwod.h.bu $xr12, $xr8, $xr1 \n\t" //G
|
||||
"xvmaddwod.h.bu $xr13, $xr10, $xr1 \n\t"
|
||||
"xvmaddwod.h.bu $xr12, $xr9, $xr2 \n\t" //R
|
||||
"xvmaddwod.h.bu $xr13, $xr11, $xr2 \n\t"
|
||||
"addi.d %0, %0, 128 \n\t"
|
||||
"xvpickod.b $xr10, $xr13, $xr12 \n\t"
|
||||
"xvperm.w $xr11, $xr10, $xr20 \n\t"
|
||||
"xvst $xr11, %1, 0 \n\t"
|
||||
"addi.d %1, %1, 32 \n\t"
|
||||
"bnez %2, 1b \n\t"
|
||||
: "+&r"(src_rgba), // %0
|
||||
"+&r"(dst_y), // %1
|
||||
"+&r"(width) // %2
|
||||
: "r"(rgbconstants),
|
||||
"r"(shuff)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void RGBAToYRow_LASX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
||||
RGBAToYMatrixRow_LASX(src_rgba, dst_y, width, &kRgb24I601Constants);
|
||||
}
|
||||
|
||||
void RGBAToYJRow_LASX(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
|
||||
RGBAToYMatrixRow_LASX(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
|
||||
}
|
||||
|
||||
void BGRAToYRow_LASX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
|
||||
RGBAToYMatrixRow_LASX(src_bgra, dst_y, width, &kRawI601Constants);
|
||||
}
|
||||
|
||||
static void RGBToYMatrixRow_LASX(const uint8_t* src_rgba,
|
||||
uint8_t* dst_y,
|
||||
int width,
|
||||
const struct RgbConstants* rgbconstants) {
|
||||
int8_t shuff[128] = {0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
|
||||
0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
|
||||
24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
|
||||
24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
|
||||
1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
|
||||
1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
|
||||
25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0,
|
||||
25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
|
||||
asm volatile(
|
||||
"xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
|
||||
"xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants
|
||||
"xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants
|
||||
"xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants
|
||||
"xvld $xr4, %4, 0 \n\t" // load shuff
|
||||
"xvld $xr5, %4, 32 \n\t"
|
||||
"xvld $xr6, %4, 64 \n\t"
|
||||
"xvld $xr7, %4, 96 \n\t"
|
||||
"1: \n\t"
|
||||
"xvld $xr8, %0, 0 \n\t"
|
||||
"xvld $xr9, %0, 32 \n\t"
|
||||
"xvld $xr10, %0, 64 \n\t" // load 32 pixels of RGB
|
||||
"xvor.v $xr12, $xr3, $xr3 \n\t"
|
||||
"xvor.v $xr13, $xr3, $xr3 \n\t"
|
||||
"xvor.v $xr11, $xr9, $xr9 \n\t"
|
||||
"addi.d %2, %2, -32 \n\t" // 32 processed per loop.
|
||||
"xvpermi.q $xr9, $xr8, 0x30 \n\t" //src0
|
||||
"xvpermi.q $xr8, $xr10, 0x03 \n\t" //src1
|
||||
"xvpermi.q $xr10, $xr11, 0x30 \n\t" //src2
|
||||
"xvshuf.b $xr14, $xr8, $xr9, $xr4 \n\t"
|
||||
"xvshuf.b $xr15, $xr8, $xr10, $xr5 \n\t"
|
||||
"xvshuf.b $xr16, $xr8, $xr9, $xr6 \n\t"
|
||||
"xvshuf.b $xr17, $xr8, $xr10, $xr7 \n\t"
|
||||
"xvmaddwev.h.bu $xr12, $xr16, $xr1 \n\t" //G
|
||||
"xvmaddwev.h.bu $xr13, $xr17, $xr1 \n\t"
|
||||
"xvmaddwev.h.bu $xr12, $xr14, $xr0 \n\t" //B
|
||||
"xvmaddwev.h.bu $xr13, $xr15, $xr0 \n\t"
|
||||
"xvmaddwod.h.bu $xr12, $xr14, $xr2 \n\t" //R
|
||||
"xvmaddwod.h.bu $xr13, $xr15, $xr2 \n\t"
|
||||
"addi.d %0, %0, 96 \n\t"
|
||||
"xvpickod.b $xr10, $xr13, $xr12 \n\t"
|
||||
"xvst $xr10, %1, 0 \n\t"
|
||||
"addi.d %1, %1, 32 \n\t"
|
||||
"bnez %2, 1b \n\t"
|
||||
: "+&r"(src_rgba), // %0
|
||||
"+&r"(dst_y), // %1
|
||||
"+&r"(width) // %2
|
||||
: "r"(rgbconstants), // %3
|
||||
"r"(shuff) // %4
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void RGB24ToYJRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
|
||||
RGBToYMatrixRow_LASX(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
|
||||
}
|
||||
|
||||
void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
|
||||
RGBToYMatrixRow_LASX(src_raw, dst_yj, width, &kRawJPEGConstants);
|
||||
}
|
||||
|
||||
void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
|
||||
RGBToYMatrixRow_LASX(src_rgb24, dst_y, width, &kRgb24I601Constants);
|
||||
}
|
||||
|
||||
void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
|
||||
RGBToYMatrixRow_LASX(src_raw, dst_y, width, &kRawI601Constants);
|
||||
}
|
||||
|
||||
void ARGBToUVJRow_LASX(const uint8_t* src_argb,
|
||||
|
@ -561,39 +561,6 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565,
|
||||
}
|
||||
}
|
||||
|
||||
void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 16;
|
||||
__m128i src0, src1, src2;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3;
|
||||
__m128i reg0, reg1, dst0;
|
||||
__m128i const_129 = __lsx_vldi(129);
|
||||
__m128i const_br = {0x4219421942194219, 0x4219421942194219};
|
||||
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
|
||||
__m128i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C};
|
||||
__m128i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604};
|
||||
__m128i shuff2 = {0x000A000700040001, 0x001600130010000D};
|
||||
__m128i shuff3 = {0x0002001F001C0019, 0x000E000B00080005};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
src0 = __lsx_vld(src_rgb24, 0);
|
||||
src1 = __lsx_vld(src_rgb24, 16);
|
||||
src2 = __lsx_vld(src_rgb24, 32);
|
||||
tmp0 = __lsx_vshuf_b(src1, src0, shuff0);
|
||||
tmp1 = __lsx_vshuf_b(src1, src2, shuff1);
|
||||
tmp2 = __lsx_vshuf_b(src1, src0, shuff2);
|
||||
tmp3 = __lsx_vshuf_b(src1, src2, shuff3);
|
||||
reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp2, const_129);
|
||||
reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
|
||||
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1);
|
||||
dst0 = __lsx_vpickod_b(reg1, reg0);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_rgb24 += 48;
|
||||
}
|
||||
}
|
||||
|
||||
void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8_t* dst_u,
|
||||
@ -647,39 +614,6 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
|
||||
}
|
||||
}
|
||||
|
||||
void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 16;
|
||||
__m128i src0, src1, src2;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3;
|
||||
__m128i reg0, reg1, dst0;
|
||||
__m128i const_129 = __lsx_vldi(129);
|
||||
__m128i const_br = {0x1942194219421942, 0x1942194219421942};
|
||||
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
|
||||
__m128i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C};
|
||||
__m128i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604};
|
||||
__m128i shuff2 = {0x000A000700040001, 0x001600130010000D};
|
||||
__m128i shuff3 = {0x0002001F001C0019, 0x000E000B00080005};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
src0 = __lsx_vld(src_raw, 0);
|
||||
src1 = __lsx_vld(src_raw, 16);
|
||||
src2 = __lsx_vld(src_raw, 32);
|
||||
tmp0 = __lsx_vshuf_b(src1, src0, shuff0);
|
||||
tmp1 = __lsx_vshuf_b(src1, src2, shuff1);
|
||||
tmp2 = __lsx_vshuf_b(src1, src0, shuff2);
|
||||
tmp3 = __lsx_vshuf_b(src1, src2, shuff3);
|
||||
reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp2, const_129);
|
||||
reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
|
||||
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1);
|
||||
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_raw += 48;
|
||||
}
|
||||
}
|
||||
|
||||
void RAWToUVRow_LSX(const uint8_t* src_raw,
|
||||
int src_stride_raw,
|
||||
uint8_t* dst_u,
|
||||
@ -914,62 +848,6 @@ void SobelXYRow_LSX(const uint8_t* src_sobelx,
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 16;
|
||||
__m128i src0, src1, src2, src3, dst0;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3;
|
||||
__m128i reg0, reg1;
|
||||
__m128i const_128 = __lsx_vldi(0x480);
|
||||
__m128i const_150 = __lsx_vldi(0x96);
|
||||
__m128i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
|
||||
src0, src1, src2, src3);
|
||||
tmp0 = __lsx_vpickev_b(src1, src0);
|
||||
tmp1 = __lsx_vpickod_b(src1, src0);
|
||||
tmp2 = __lsx_vpickev_b(src3, src2);
|
||||
tmp3 = __lsx_vpickod_b(src3, src2);
|
||||
reg0 = __lsx_vmaddwev_h_bu(const_128, tmp1, const_150);
|
||||
reg1 = __lsx_vmaddwev_h_bu(const_128, tmp3, const_150);
|
||||
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
|
||||
dst0 = __lsx_vpickod_b(reg1, reg0);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_argb += 64;
|
||||
}
|
||||
}
|
||||
|
||||
void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 16;
|
||||
__m128i src0, src1, src2, src3, dst0;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3;
|
||||
__m128i reg0, reg1;
|
||||
__m128i const_129 = __lsx_vldi(0x81);
|
||||
__m128i const_br = {0x1942194219421942, 0x1942194219421942};
|
||||
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48,
|
||||
src0, src1, src2, src3);
|
||||
tmp0 = __lsx_vpickod_b(src1, src0);
|
||||
tmp1 = __lsx_vpickev_b(src1, src0);
|
||||
tmp2 = __lsx_vpickod_b(src3, src2);
|
||||
tmp3 = __lsx_vpickev_b(src3, src2);
|
||||
reg0 = __lsx_vmaddwod_h_bu(const_1080, tmp1, const_129);
|
||||
reg1 = __lsx_vmaddwod_h_bu(const_1080, tmp3, const_129);
|
||||
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
|
||||
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_bgra += 64;
|
||||
}
|
||||
}
|
||||
|
||||
void BGRAToUVRow_LSX(const uint8_t* src_bgra,
|
||||
int src_stride_bgra,
|
||||
uint8_t* dst_u,
|
||||
@ -1018,34 +896,6 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra,
|
||||
}
|
||||
}
|
||||
|
||||
void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 16;
|
||||
__m128i src0, src1, src2, src3, dst0;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3;
|
||||
__m128i reg0, reg1;
|
||||
__m128i const_129 = __lsx_vldi(0x81);
|
||||
__m128i const_br = {0x1942194219421942, 0x1942194219421942};
|
||||
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48,
|
||||
src0, src1, src2, src3);
|
||||
tmp0 = __lsx_vpickev_b(src1, src0);
|
||||
tmp1 = __lsx_vpickod_b(src1, src0);
|
||||
tmp2 = __lsx_vpickev_b(src3, src2);
|
||||
tmp3 = __lsx_vpickod_b(src3, src2);
|
||||
reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp1, const_129);
|
||||
reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
|
||||
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
|
||||
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_abgr += 64;
|
||||
}
|
||||
}
|
||||
|
||||
void ABGRToUVRow_LSX(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_u,
|
||||
@ -1094,34 +944,6 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr,
|
||||
}
|
||||
}
|
||||
|
||||
void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 16;
|
||||
__m128i src0, src1, src2, src3, dst0;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3;
|
||||
__m128i reg0, reg1;
|
||||
__m128i const_129 = __lsx_vldi(0x81);
|
||||
__m128i const_br = {0x4219421942194219, 0x4219421942194219};
|
||||
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48,
|
||||
src0, src1, src2, src3);
|
||||
tmp0 = __lsx_vpickod_b(src1, src0);
|
||||
tmp1 = __lsx_vpickev_b(src1, src0);
|
||||
tmp2 = __lsx_vpickod_b(src3, src2);
|
||||
tmp3 = __lsx_vpickev_b(src3, src2);
|
||||
reg0 = __lsx_vmaddwod_h_bu(const_1080, tmp1, const_129);
|
||||
reg1 = __lsx_vmaddwod_h_bu(const_1080, tmp3, const_129);
|
||||
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
|
||||
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_rgba += 64;
|
||||
}
|
||||
}
|
||||
|
||||
void RGBAToUVRow_LSX(const uint8_t* src_rgba,
|
||||
int src_stride_rgba,
|
||||
uint8_t* dst_u,
|
||||
@ -1821,6 +1643,212 @@ void HalfFloatRow_LSX(const uint16_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
struct RgbConstants {
|
||||
uint8_t kRGBToY[4];
|
||||
uint16_t kAddY;
|
||||
uint16_t pad;
|
||||
};
|
||||
|
||||
// RGB to JPeg coefficients
|
||||
// B * 0.1140 coefficient = 29
|
||||
// G * 0.5870 coefficient = 150
|
||||
// R * 0.2990 coefficient = 77
|
||||
// Add 0.5 = 0x80
|
||||
static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
|
||||
128,
|
||||
0};
|
||||
|
||||
static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
|
||||
|
||||
// RGB to BT.601 coefficients
|
||||
// B * 0.1016 coefficient = 25
|
||||
// G * 0.5078 coefficient = 129
|
||||
// R * 0.2578 coefficient = 66
|
||||
// Add 16.5 = 0x1080
|
||||
|
||||
static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
|
||||
0x1080,
|
||||
0};
|
||||
|
||||
static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
|
||||
0x1080,
|
||||
0};
|
||||
|
||||
// ARGB expects first 3 values to contain RGB and 4th value is ignored.
|
||||
static void ARGBToYMatrixRow_LSX(const uint8_t* src_argb,
|
||||
uint8_t* dst_y,
|
||||
int width,
|
||||
const struct RgbConstants* rgbconstants) {
|
||||
asm volatile(
|
||||
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
|
||||
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
|
||||
"1: \n\t"
|
||||
"vld $vr4, %0, 0 \n\t"
|
||||
"vld $vr5, %0, 16 \n\t"
|
||||
"vld $vr6, %0, 32 \n\t"
|
||||
"vld $vr7, %0, 48 \n\t" // load 16 pixels of ARGB
|
||||
"vor.v $vr12, $vr3, $vr3 \n\t"
|
||||
"vor.v $vr13, $vr3, $vr3 \n\t"
|
||||
"addi.d %2, %2, -16 \n\t" // 16 processed per loop.
|
||||
"vpickev.b $vr8, $vr5, $vr4 \n\t" //BR
|
||||
"vpickev.b $vr10, $vr7, $vr6 \n\t"
|
||||
"vpickod.b $vr9, $vr5, $vr4 \n\t" //GA
|
||||
"vpickod.b $vr11, $vr7, $vr6 \n\t"
|
||||
"vmaddwev.h.bu $vr12, $vr8, $vr0 \n\t" //B
|
||||
"vmaddwev.h.bu $vr13, $vr10, $vr0 \n\t"
|
||||
"vmaddwev.h.bu $vr12, $vr9, $vr1 \n\t" //G
|
||||
"vmaddwev.h.bu $vr13, $vr11, $vr1 \n\t"
|
||||
"vmaddwod.h.bu $vr12, $vr8, $vr2 \n\t" //R
|
||||
"vmaddwod.h.bu $vr13, $vr10, $vr2 \n\t"
|
||||
"addi.d %0, %0, 64 \n\t"
|
||||
"vpickod.b $vr10, $vr13, $vr12 \n\t"
|
||||
"vst $vr10, %1, 0 \n\t"
|
||||
"addi.d %1, %1, 16 \n\t"
|
||||
"bnez %2, 1b \n\t"
|
||||
: "+&r"(src_argb), // %0
|
||||
"+&r"(dst_y), // %1
|
||||
"+&r"(width) // %2
|
||||
: "r"(rgbconstants)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToYRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
ARGBToYMatrixRow_LSX(src_argb, dst_y, width, &kRgb24I601Constants);
|
||||
}
|
||||
|
||||
void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
|
||||
ARGBToYMatrixRow_LSX(src_argb, dst_yj, width, &kRgb24JPEGConstants);
|
||||
}
|
||||
|
||||
void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
|
||||
ARGBToYMatrixRow_LSX(src_abgr, dst_y, width, &kRawI601Constants);
|
||||
}
|
||||
|
||||
void ABGRToYJRow_LSX(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
|
||||
ARGBToYMatrixRow_LSX(src_abgr, dst_yj, width, &kRawJPEGConstants);
|
||||
}
|
||||
|
||||
// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
|
||||
// Same code as ARGB, except the LD4
|
||||
static void RGBAToYMatrixRow_LSX(const uint8_t* src_rgba,
|
||||
uint8_t* dst_y,
|
||||
int width,
|
||||
const struct RgbConstants* rgbconstants) {
|
||||
asm volatile(
|
||||
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
|
||||
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
|
||||
"1: \n\t"
|
||||
"vld $vr4, %0, 0 \n\t"
|
||||
"vld $vr5, %0, 16 \n\t"
|
||||
"vld $vr6, %0, 32 \n\t"
|
||||
"vld $vr7, %0, 48 \n\t" // load 16 pixels of RGBA
|
||||
"vor.v $vr12, $vr3, $vr3 \n\t"
|
||||
"vor.v $vr13, $vr3, $vr3 \n\t"
|
||||
"addi.d %2, %2, -16 \n\t" // 16 processed per loop.
|
||||
"vpickev.b $vr8, $vr5, $vr4 \n\t" //AG
|
||||
"vpickev.b $vr10, $vr7, $vr6 \n\t"
|
||||
"vpickod.b $vr9, $vr5, $vr4 \n\t" //BR
|
||||
"vpickod.b $vr11, $vr7, $vr6 \n\t"
|
||||
"vmaddwev.h.bu $vr12, $vr9, $vr0 \n\t" //B
|
||||
"vmaddwev.h.bu $vr13, $vr11, $vr0 \n\t"
|
||||
"vmaddwod.h.bu $vr12, $vr8, $vr1 \n\t" //G
|
||||
"vmaddwod.h.bu $vr13, $vr10, $vr1 \n\t"
|
||||
"vmaddwod.h.bu $vr12, $vr9, $vr2 \n\t" //R
|
||||
"vmaddwod.h.bu $vr13, $vr11, $vr2 \n\t"
|
||||
"addi.d %0, %0, 64 \n\t"
|
||||
"vpickod.b $vr10, $vr13, $vr12 \n\t"
|
||||
"vst $vr10, %1, 0 \n\t"
|
||||
"addi.d %1, %1, 16 \n\t"
|
||||
"bnez %2, 1b \n\t"
|
||||
: "+&r"(src_rgba), // %0
|
||||
"+&r"(dst_y), // %1
|
||||
"+&r"(width) // %2
|
||||
: "r"(rgbconstants)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
||||
RGBAToYMatrixRow_LSX(src_rgba, dst_y, width, &kRgb24I601Constants);
|
||||
}
|
||||
|
||||
void RGBAToYJRow_LSX(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
|
||||
RGBAToYMatrixRow_LSX(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
|
||||
}
|
||||
|
||||
void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
|
||||
RGBAToYMatrixRow_LSX(src_bgra, dst_y, width, &kRawI601Constants);
|
||||
}
|
||||
|
||||
static void RGBToYMatrixRow_LSX(const uint8_t* src_rgba,
|
||||
uint8_t* dst_y,
|
||||
int width,
|
||||
const struct RgbConstants* rgbconstants) {
|
||||
int8_t shuff[64] = {0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
|
||||
24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
|
||||
1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
|
||||
25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
|
||||
asm volatile(
|
||||
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
|
||||
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
|
||||
"vld $vr4, %4, 0 \n\t" // load shuff
|
||||
"vld $vr5, %4, 16 \n\t"
|
||||
"vld $vr6, %4, 32 \n\t"
|
||||
"vld $vr7, %4, 48 \n\t"
|
||||
"1: \n\t"
|
||||
"vld $vr8, %0, 0 \n\t"
|
||||
"vld $vr9, %0, 16 \n\t"
|
||||
"vld $vr10, %0, 32 \n\t" // load 16 pixels of RGB
|
||||
"vor.v $vr12, $vr3, $vr3 \n\t"
|
||||
"vor.v $vr13, $vr3, $vr3 \n\t"
|
||||
"addi.d %2, %2, -16 \n\t" // 16 processed per loop.
|
||||
"vshuf.b $vr14, $vr9, $vr8, $vr4 \n\t"
|
||||
"vshuf.b $vr15, $vr9, $vr10, $vr5 \n\t"
|
||||
"vshuf.b $vr16, $vr9, $vr8, $vr6 \n\t"
|
||||
"vshuf.b $vr17, $vr9, $vr10, $vr7 \n\t"
|
||||
"vmaddwev.h.bu $vr12, $vr16, $vr1 \n\t" //G
|
||||
"vmaddwev.h.bu $vr13, $vr17, $vr1 \n\t"
|
||||
"vmaddwev.h.bu $vr12, $vr14, $vr0 \n\t" //B
|
||||
"vmaddwev.h.bu $vr13, $vr15, $vr0 \n\t"
|
||||
"vmaddwod.h.bu $vr12, $vr14, $vr2 \n\t" //R
|
||||
"vmaddwod.h.bu $vr13, $vr15, $vr2 \n\t"
|
||||
"addi.d %0, %0, 48 \n\t"
|
||||
"vpickod.b $vr10, $vr13, $vr12 \n\t"
|
||||
"vst $vr10, %1, 0 \n\t"
|
||||
"addi.d %1, %1, 16 \n\t"
|
||||
"bnez %2, 1b \n\t"
|
||||
: "+&r"(src_rgba), // %0
|
||||
"+&r"(dst_y), // %1
|
||||
"+&r"(width) // %2
|
||||
: "r"(rgbconstants), // %3
|
||||
"r"(shuff) // %4
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void RGB24ToYJRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
|
||||
RGBToYMatrixRow_LSX(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
|
||||
}
|
||||
|
||||
void RAWToYJRow_LSX(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
|
||||
RGBToYMatrixRow_LSX(src_raw, dst_yj, width, &kRawJPEGConstants);
|
||||
}
|
||||
|
||||
void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
|
||||
RGBToYMatrixRow_LSX(src_rgb24, dst_y, width, &kRgb24I601Constants);
|
||||
}
|
||||
|
||||
void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
|
||||
RGBToYMatrixRow_LSX(src_raw, dst_y, width, &kRawI601Constants);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
@ -720,6 +720,60 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
|
||||
}
|
||||
#endif
|
||||
|
||||
void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
|
||||
const uint8_t* src_lower_bits = src;
|
||||
const uint8_t* src_upper_bits = src + 16;
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"vld4.8 {d1, d3, d5, d7}, [%1]! \n" // Load 32 bytes of upper
|
||||
// bits.
|
||||
"vld1.8 {d6}, [%0]! \n" // Load 8 bytes of lower
|
||||
// bits.
|
||||
"vshl.u8 d4, d6, #2 \n" // Align lower bits.
|
||||
"vshl.u8 d2, d6, #4 \n"
|
||||
"vshl.u8 d0, d6, #6 \n"
|
||||
"vzip.u8 d0, d1 \n" // Zip lower and upper
|
||||
// bits together.
|
||||
"vzip.u8 d2, d3 \n"
|
||||
"vzip.u8 d4, d5 \n"
|
||||
"vzip.u8 d6, d7 \n"
|
||||
"vsri.u16 q0, q0, #10 \n" // Copy upper 6 bits into
|
||||
// lower 6 bits for better
|
||||
// accuracy in
|
||||
// conversions.
|
||||
"vsri.u16 q1, q1, #10 \n"
|
||||
"vsri.u16 q2, q2, #10 \n"
|
||||
"vsri.u16 q3, q3, #10 \n"
|
||||
"vst4.16 {d0, d2, d4, d6}, [%2]! \n" // Store 32 pixels
|
||||
"vst4.16 {d1, d3, d5, d7}, [%2]! \n"
|
||||
"vld4.8 {d1, d3, d5, d7}, [%1]! \n" // Process last 32 pixels
|
||||
// in the block
|
||||
"vld1.8 {d6}, [%0]! \n"
|
||||
"vshl.u8 d4, d6, #2 \n"
|
||||
"vshl.u8 d2, d6, #4 \n"
|
||||
"vshl.u8 d0, d6, #6 \n"
|
||||
"vzip.u8 d0, d1 \n"
|
||||
"vzip.u8 d2, d3 \n"
|
||||
"vzip.u8 d4, d5 \n"
|
||||
"vzip.u8 d6, d7 \n"
|
||||
"vsri.u16 q0, q0, #10 \n"
|
||||
"vsri.u16 q1, q1, #10 \n"
|
||||
"vsri.u16 q2, q2, #10 \n"
|
||||
"vsri.u16 q3, q3, #10 \n"
|
||||
"vst4.16 {d0, d2, d4, d6}, [%2]! \n"
|
||||
"vst4.16 {d1, d3, d5, d7}, [%2]! \n"
|
||||
"mov %0, %1 \n"
|
||||
"add %1, %0, #16 \n"
|
||||
"subs %3, %3, #80 \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_lower_bits), // %0
|
||||
"+r"(src_upper_bits), // %1
|
||||
"+r"(dst), // %2
|
||||
"+r"(size) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3");
|
||||
}
|
||||
|
||||
// Reads 16 U's and V's and writes out 16 pairs of UV.
|
||||
void MergeUVRow_NEON(const uint8_t* src_u,
|
||||
const uint8_t* src_v,
|
||||
@ -3857,31 +3911,25 @@ void DivideRow_16_NEON(const uint16_t* src_y,
|
||||
int scale,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"vdup.16 q0, %3 \n"
|
||||
"vdup.16 d8, %3 \n"
|
||||
"1: \n"
|
||||
"vld1.16 {q1}, [%0]! \n"
|
||||
"vld1.16 {q2}, [%0]! \n"
|
||||
"vmovl.u16 q3, d2 \n"
|
||||
"vmovl.u16 q1, d3 \n"
|
||||
"vmovl.u16 q4, d4 \n"
|
||||
"vmovl.u16 q2, d5 \n"
|
||||
"vshl.u32 q3, q3, q0 \n"
|
||||
"vshl.u32 q4, q4, q0 \n"
|
||||
"vshl.u32 q1, q1, q0 \n"
|
||||
"vshl.u32 q2, q2, q0 \n"
|
||||
"vmovn.u32 d2, q3 \n"
|
||||
"vmovn.u32 d3, q1 \n"
|
||||
"vmovn.u32 d4, q4 \n"
|
||||
"vmovn.u32 d5, q2 \n"
|
||||
"vst1.16 {q1}, [%1]! \n"
|
||||
"vst1.16 {q2}, [%1]! \n"
|
||||
"vld1.16 {q2, q3}, [%0]! \n"
|
||||
"vmull.u16 q0, d4, d8 \n"
|
||||
"vmull.u16 q1, d5, d8 \n"
|
||||
"vmull.u16 q2, d6, d8 \n"
|
||||
"vmull.u16 q3, d7, d8 \n"
|
||||
"vshrn.u32 d0, q0, #16 \n"
|
||||
"vshrn.u32 d1, q1, #16 \n"
|
||||
"vshrn.u32 d2, q2, #16 \n"
|
||||
"vshrn.u32 d3, q3, #16 \n"
|
||||
"vst1.16 {q0, q1}, [%1]! \n" // store 16 pixels
|
||||
"subs %2, %2, #16 \n" // 16 src pixels per loop
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"(scale) // %3
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4");
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "d8");
|
||||
}
|
||||
|
||||
// Use scale to convert lsb formats to msb, depending how many bits there are:
|
||||
|
@ -749,6 +749,54 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
|
||||
}
|
||||
#endif
|
||||
|
||||
// Unpack MT2T into tiled P010 64 pixels at a time. See
|
||||
// tinyurl.com/mtk-10bit-video-format for format documentation.
|
||||
void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
|
||||
const uint8_t* src_lower_bits = src;
|
||||
const uint8_t* src_upper_bits = src + 16;
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [%1], #32 \n"
|
||||
"ld1 {v7.8b}, [%0], #8 \n"
|
||||
"shl v6.8b, v7.8b, #2 \n"
|
||||
"shl v5.8b, v7.8b, #4 \n"
|
||||
"shl v4.8b, v7.8b, #6 \n"
|
||||
"zip1 v0.16b, v4.16b, v0.16b \n"
|
||||
"zip1 v1.16b, v5.16b, v1.16b \n"
|
||||
"zip1 v2.16b, v6.16b, v2.16b \n"
|
||||
"zip1 v3.16b, v7.16b, v3.16b \n"
|
||||
"sri v0.8h, v0.8h, #10 \n"
|
||||
"sri v1.8h, v1.8h, #10 \n"
|
||||
"sri v2.8h, v2.8h, #10 \n"
|
||||
"sri v3.8h, v3.8h, #10 \n"
|
||||
"st4 {v0.8h, v1.8h, v2.8h, v3.8h}, [%2], #64 \n"
|
||||
"ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [%1], #32 \n"
|
||||
"ld1 {v7.8b}, [%0], #8 \n"
|
||||
"shl v6.8b, v7.8b, #2 \n"
|
||||
"shl v5.8b, v7.8b, #4 \n"
|
||||
"shl v4.8b, v7.8b, #6 \n"
|
||||
"zip1 v0.16b, v4.16b, v0.16b \n"
|
||||
"zip1 v1.16b, v5.16b, v1.16b \n"
|
||||
"zip1 v2.16b, v6.16b, v2.16b \n"
|
||||
"zip1 v3.16b, v7.16b, v3.16b \n"
|
||||
"sri v0.8h, v0.8h, #10 \n"
|
||||
"sri v1.8h, v1.8h, #10 \n"
|
||||
"sri v2.8h, v2.8h, #10 \n"
|
||||
"sri v3.8h, v3.8h, #10 \n"
|
||||
"st4 {v0.8h, v1.8h, v2.8h, v3.8h}, [%2], #64 \n"
|
||||
"mov %0, %1 \n"
|
||||
"add %1, %0, #16 \n"
|
||||
"subs %3, %3, #80 \n"
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_lower_bits), // %0
|
||||
"+r"(src_upper_bits), // %1
|
||||
"+r"(dst), // %2
|
||||
"+r"(size) // %3
|
||||
:
|
||||
: "cc", "memory", "w0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v8", "v9", "v10", "v11", "v12");
|
||||
}
|
||||
|
||||
#if LIBYUV_USE_ST2
|
||||
// Reads 16 U's and V's and writes out 16 pairs of UV.
|
||||
void MergeUVRow_NEON(const uint8_t* src_u,
|
||||
@ -4413,23 +4461,19 @@ void DivideRow_16_NEON(const uint16_t* src_y,
|
||||
int scale,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"dup v0.8h, %w3 \n"
|
||||
"dup v4.8h, %w3 \n"
|
||||
"1: \n"
|
||||
"ldp q1, q2, [%0], #32 \n"
|
||||
"ushll v3.4s, v1.4h, #0 \n"
|
||||
"ushll v4.4s, v2.4h, #0 \n"
|
||||
"ldp q2, q3, [%0], #32 \n"
|
||||
"umull v0.4s, v2.4h, v4.4h \n"
|
||||
"umull2 v1.4s, v2.8h, v4.8h \n"
|
||||
"umull v2.4s, v3.4h, v4.4h \n"
|
||||
"umull2 v3.4s, v3.8h, v4.8h \n"
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"ushll2 v1.4s, v1.8h, #0 \n"
|
||||
"ushll2 v2.4s, v2.8h, #0 \n"
|
||||
"mul v3.4s, v0.4s, v3.4s \n"
|
||||
"mul v4.4s, v0.4s, v4.4s \n"
|
||||
"mul v1.4s, v0.4s, v1.4s \n"
|
||||
"mul v2.4s, v0.4s, v2.4s \n"
|
||||
"shrn v3.4h, v3.4s, #16 \n"
|
||||
"shrn v4.4h, v4.4s, #16 \n"
|
||||
"shrn2 v3.8h, v1.4s, #16 \n"
|
||||
"shrn2 v4.8h, v2.4s, #16 \n"
|
||||
"stp q3, q3, [%1], #32 \n" // store 16 pixels
|
||||
"shrn v0.4h, v0.4s, #16 \n"
|
||||
"shrn2 v0.8h, v1.4s, #16 \n"
|
||||
"shrn v1.4h, v2.4s, #16 \n"
|
||||
"shrn2 v1.8h, v3.4s, #16 \n"
|
||||
"stp q0, q1, [%1], #32 \n" // store 16 pixels
|
||||
"subs %w2, %w2, #16 \n" // 16 src pixels per loop
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
|
@ -198,6 +198,51 @@ static void ScalePlaneDown2_16(int src_width,
|
||||
}
|
||||
}
|
||||
|
||||
void ScalePlaneDown2_16To8(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint16_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int scale,
|
||||
enum FilterMode filtering) {
|
||||
int y;
|
||||
void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
|
||||
uint8_t* dst_ptr, int dst_width, int scale) =
|
||||
(src_width & 1)
|
||||
? (filtering == kFilterNone
|
||||
? ScaleRowDown2_16To8_Odd_C
|
||||
: (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_Odd_C
|
||||
: ScaleRowDown2Box_16To8_Odd_C))
|
||||
: (filtering == kFilterNone
|
||||
? ScaleRowDown2_16To8_C
|
||||
: (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C
|
||||
: ScaleRowDown2Box_16To8_C));
|
||||
int row_stride = src_stride * 2;
|
||||
(void)dst_height;
|
||||
if (!filtering) {
|
||||
src_ptr += src_stride; // Point to odd rows.
|
||||
src_stride = 0;
|
||||
}
|
||||
|
||||
if (filtering == kFilterLinear) {
|
||||
src_stride = 0;
|
||||
}
|
||||
for (y = 0; y < src_height / 2; ++y) {
|
||||
ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width, scale);
|
||||
src_ptr += row_stride;
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
if (src_height & 1) {
|
||||
if (!filtering) {
|
||||
src_ptr -= src_stride; // Point to last row.
|
||||
}
|
||||
ScaleRowDown2(src_ptr, 0, dst_ptr, dst_width, scale);
|
||||
}
|
||||
}
|
||||
|
||||
// Scale plane, 1/4
|
||||
// This is an optimized version for scaling down a plane to 1/4 of
|
||||
// its original size.
|
||||
@ -775,9 +820,9 @@ static void ScaleAddCols2_C(int dst_width,
|
||||
int ix = x >> 16;
|
||||
x += dx;
|
||||
boxwidth = MIN1((x >> 16) - ix);
|
||||
*dst_ptr++ =
|
||||
SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >>
|
||||
16;
|
||||
*dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + ix) *
|
||||
scaletbl[boxwidth - minboxwidth] >>
|
||||
16);
|
||||
}
|
||||
}
|
||||
|
||||
@ -814,7 +859,7 @@ static void ScaleAddCols0_C(int dst_width,
|
||||
(void)dx;
|
||||
src_ptr += (x >> 16);
|
||||
for (i = 0; i < dst_width; ++i) {
|
||||
*dst_ptr++ = src_ptr[i] * scaleval >> 16;
|
||||
*dst_ptr++ = (uint8_t)(src_ptr[i] * scaleval >> 16);
|
||||
}
|
||||
}
|
||||
|
||||
@ -829,7 +874,7 @@ static void ScaleAddCols1_C(int dst_width,
|
||||
int i;
|
||||
x >>= 16;
|
||||
for (i = 0; i < dst_width; ++i) {
|
||||
*dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
|
||||
*dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + x) * scaleval >> 16);
|
||||
x += boxwidth;
|
||||
}
|
||||
}
|
||||
|
@ -58,9 +58,9 @@ static void ScaleARGBDown2(int src_width,
|
||||
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
|
||||
// Advance to odd row, even column.
|
||||
if (filtering == kFilterBilinear) {
|
||||
src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
|
||||
src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
|
||||
} else {
|
||||
src_argb += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 4;
|
||||
src_argb += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 4;
|
||||
}
|
||||
|
||||
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
|
||||
@ -162,7 +162,7 @@ static void ScaleARGBDown4Box(int src_width,
|
||||
uint8_t* dst_argb, int dst_width) =
|
||||
ScaleARGBRowDown2Box_C;
|
||||
// Advance to odd row, even column.
|
||||
src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
|
||||
src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
|
||||
(void)src_width;
|
||||
(void)src_height;
|
||||
(void)dx;
|
||||
@ -214,7 +214,7 @@ static void ScaleARGBDownEven(int src_width,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
int col_step = dx >> 16;
|
||||
int row_stride = (dy >> 16) * (int64_t)src_stride;
|
||||
ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
|
||||
void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
|
||||
int src_step, uint8_t* dst_argb, int dst_width) =
|
||||
filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
|
||||
@ -222,7 +222,7 @@ static void ScaleARGBDownEven(int src_width,
|
||||
(void)src_height;
|
||||
assert(IS_ALIGNED(src_width, 2));
|
||||
assert(IS_ALIGNED(src_height, 2));
|
||||
src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
|
||||
src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
|
||||
@ -289,10 +289,10 @@ static void ScaleARGBBilinearDown(int src_width,
|
||||
int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
|
||||
void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
|
||||
void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
(src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
|
||||
int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
|
||||
@ -388,7 +388,7 @@ static void ScaleARGBBilinearDown(int src_width,
|
||||
}
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
int yi = y >> 16;
|
||||
const uint8_t* src = src_argb + yi * (int64_t)src_stride;
|
||||
const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
|
||||
if (filtering == kFilterLinear) {
|
||||
ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
|
||||
} else {
|
||||
@ -421,10 +421,10 @@ static void ScaleARGBBilinearUp(int src_width,
|
||||
int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
|
||||
void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
|
||||
void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
|
||||
const int max_y = (src_height - 1) << 16;
|
||||
@ -545,7 +545,7 @@ static void ScaleARGBBilinearUp(int src_width,
|
||||
|
||||
{
|
||||
int yi = y >> 16;
|
||||
const uint8_t* src = src_argb + yi * (int64_t)src_stride;
|
||||
const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
|
||||
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int row_size = (dst_width * 4 + 31) & ~31;
|
||||
@ -570,7 +570,7 @@ static void ScaleARGBBilinearUp(int src_width,
|
||||
if (y > max_y) {
|
||||
y = max_y;
|
||||
yi = y >> 16;
|
||||
src = src_argb + yi * (int64_t)src_stride;
|
||||
src = src_argb + yi * (intptr_t)src_stride;
|
||||
}
|
||||
if (yi != lasty) {
|
||||
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
|
||||
@ -668,7 +668,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
|
||||
void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
@ -712,7 +712,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
|
||||
void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
|
||||
if (src_width >= 32768) {
|
||||
@ -793,9 +793,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
|
||||
const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
|
||||
int yi = y >> 16;
|
||||
int uv_yi = yi >> kYShift;
|
||||
const uint8_t* src_row_y = src_y + yi * (int64_t)src_stride_y;
|
||||
const uint8_t* src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
|
||||
const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
|
||||
const uint8_t* src_row_y = src_y + yi * (intptr_t)src_stride_y;
|
||||
const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
|
||||
const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
|
||||
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int row_size = (dst_width * 4 + 31) & ~31;
|
||||
@ -833,9 +833,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
|
||||
y = max_y;
|
||||
yi = y >> 16;
|
||||
uv_yi = yi >> kYShift;
|
||||
src_row_y = src_y + yi * (int64_t)src_stride_y;
|
||||
src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
|
||||
src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
|
||||
src_row_y = src_y + yi * (intptr_t)src_stride_y;
|
||||
src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
|
||||
src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
|
||||
}
|
||||
if (yi != lasty) {
|
||||
// TODO(fbarchard): Convert the clipped region of row.
|
||||
@ -883,7 +883,7 @@ static void ScaleARGBSimple(int src_width,
|
||||
int y,
|
||||
int dy) {
|
||||
int j;
|
||||
void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb,
|
||||
void (*ScaleARGBCols)(uint8_t* dst_argb, const uint8_t* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
(src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
|
||||
(void)src_height;
|
||||
@ -926,7 +926,7 @@ static void ScaleARGBSimple(int src_width,
|
||||
}
|
||||
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (int64_t)src_stride,
|
||||
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (intptr_t)src_stride,
|
||||
dst_width, x, dx);
|
||||
dst_argb += dst_stride;
|
||||
y += dy;
|
||||
@ -962,7 +962,7 @@ static void ScaleARGB(const uint8_t* src,
|
||||
// Negative src_height means invert the image.
|
||||
if (src_height < 0) {
|
||||
src_height = -src_height;
|
||||
src = src + (src_height - 1) * (int64_t)src_stride;
|
||||
src = src + (src_height - 1) * (intptr_t)src_stride;
|
||||
src_stride = -src_stride;
|
||||
}
|
||||
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
|
||||
@ -977,7 +977,7 @@ static void ScaleARGB(const uint8_t* src,
|
||||
if (clip_y) {
|
||||
int64_t clipf = (int64_t)(clip_y)*dy;
|
||||
y += (clipf & 0xffff);
|
||||
src += (clipf >> 16) * (int64_t)src_stride;
|
||||
src += (clipf >> 16) * (intptr_t)src_stride;
|
||||
dst += clip_y * dst_stride;
|
||||
}
|
||||
|
||||
@ -1011,7 +1011,7 @@ static void ScaleARGB(const uint8_t* src,
|
||||
filtering = kFilterNone;
|
||||
if (dx == 0x10000 && dy == 0x10000) {
|
||||
// Straight copy.
|
||||
ARGBCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 4,
|
||||
ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4,
|
||||
src_stride, dst, dst_stride, clip_width, clip_height);
|
||||
return;
|
||||
}
|
||||
|
@ -23,6 +23,25 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define STATIC_CAST(type, expr) static_cast<type>(expr)
|
||||
#else
|
||||
#define STATIC_CAST(type, expr) (type)(expr)
|
||||
#endif
|
||||
|
||||
// TODO(fbarchard): make clamp255 preserve negative values.
|
||||
static __inline int32_t clamp255(int32_t v) {
|
||||
return (-(v >= 255) | v) & 255;
|
||||
}
|
||||
|
||||
// Use scale to convert lsb formats to msb, depending how many bits there are:
|
||||
// 32768 = 9 bits
|
||||
// 16384 = 10 bits
|
||||
// 4096 = 12 bits
|
||||
// 256 = 16 bits
|
||||
// TODO(fbarchard): change scale to bits
|
||||
#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
|
||||
|
||||
static __inline int Abs(int v) {
|
||||
return v >= 0 ? v : -v;
|
||||
}
|
||||
@ -62,6 +81,50 @@ void ScaleRowDown2_16_C(const uint16_t* src_ptr,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width,
|
||||
int scale) {
|
||||
int x;
|
||||
(void)src_stride;
|
||||
assert(scale >= 256);
|
||||
assert(scale <= 32768);
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
|
||||
dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
|
||||
dst += 2;
|
||||
src_ptr += 4;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width,
|
||||
int scale) {
|
||||
int x;
|
||||
(void)src_stride;
|
||||
assert(scale >= 256);
|
||||
assert(scale <= 32768);
|
||||
dst_width -= 1;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
|
||||
dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
|
||||
dst += 2;
|
||||
src_ptr += 4;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
|
||||
dst += 1;
|
||||
src_ptr += 2;
|
||||
}
|
||||
dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[0], scale));
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
@ -98,6 +161,52 @@ void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width,
|
||||
int scale) {
|
||||
const uint16_t* s = src_ptr;
|
||||
int x;
|
||||
(void)src_stride;
|
||||
assert(scale >= 256);
|
||||
assert(scale <= 32768);
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
|
||||
dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
|
||||
dst += 2;
|
||||
s += 4;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width,
|
||||
int scale) {
|
||||
const uint16_t* s = src_ptr;
|
||||
int x;
|
||||
(void)src_stride;
|
||||
assert(scale >= 256);
|
||||
assert(scale <= 32768);
|
||||
dst_width -= 1;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
|
||||
dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
|
||||
dst += 2;
|
||||
s += 4;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
|
||||
dst += 1;
|
||||
s += 2;
|
||||
}
|
||||
dst[0] = STATIC_CAST(uint8_t, C16TO8(s[0], scale));
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_C(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
@ -160,6 +269,61 @@ void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width,
|
||||
int scale) {
|
||||
const uint16_t* s = src_ptr;
|
||||
const uint16_t* t = src_ptr + src_stride;
|
||||
int x;
|
||||
assert(scale >= 256);
|
||||
assert(scale <= 32768);
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = STATIC_CAST(uint8_t,
|
||||
C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
|
||||
dst[1] = STATIC_CAST(uint8_t,
|
||||
C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
|
||||
dst += 2;
|
||||
s += 4;
|
||||
t += 4;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = STATIC_CAST(uint8_t,
|
||||
C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width,
|
||||
int scale) {
|
||||
const uint16_t* s = src_ptr;
|
||||
const uint16_t* t = src_ptr + src_stride;
|
||||
int x;
|
||||
assert(scale >= 256);
|
||||
assert(scale <= 32768);
|
||||
dst_width -= 1;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = STATIC_CAST(uint8_t,
|
||||
C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
|
||||
dst[1] = STATIC_CAST(uint8_t,
|
||||
C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
|
||||
dst += 2;
|
||||
s += 4;
|
||||
t += 4;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = STATIC_CAST(uint8_t,
|
||||
C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
|
||||
dst += 1;
|
||||
s += 2;
|
||||
t += 2;
|
||||
}
|
||||
dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + t[0] + 1) >> 1, scale));
|
||||
}
|
||||
|
||||
void ScaleRowDown4_C(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
|
@ -83,9 +83,9 @@ static void ScaleUVDown2(int src_width,
|
||||
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
|
||||
// Advance to odd row, even column.
|
||||
if (filtering == kFilterBilinear) {
|
||||
src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
|
||||
src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
|
||||
} else {
|
||||
src_uv += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 2;
|
||||
src_uv += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 2;
|
||||
}
|
||||
|
||||
#if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
|
||||
@ -200,7 +200,7 @@ static void ScaleUVDown4Box(int src_width,
|
||||
uint8_t* dst_uv, int dst_width) =
|
||||
ScaleUVRowDown2Box_C;
|
||||
// Advance to odd row, even column.
|
||||
src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
|
||||
src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
|
||||
(void)src_width;
|
||||
(void)src_height;
|
||||
(void)dx;
|
||||
@ -263,7 +263,7 @@ static void ScaleUVDownEven(int src_width,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
int col_step = dx >> 16;
|
||||
int row_stride = (dy >> 16) * (int64_t)src_stride;
|
||||
ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
|
||||
void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride,
|
||||
int src_step, uint8_t* dst_uv, int dst_width) =
|
||||
filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C;
|
||||
@ -271,7 +271,7 @@ static void ScaleUVDownEven(int src_width,
|
||||
(void)src_height;
|
||||
assert(IS_ALIGNED(src_width, 2));
|
||||
assert(IS_ALIGNED(src_height, 2));
|
||||
src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
|
||||
src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
|
||||
#if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
|
||||
@ -338,10 +338,10 @@ static void ScaleUVBilinearDown(int src_width,
|
||||
int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
|
||||
void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
|
||||
void (*ScaleUVFilterCols)(uint8_t* dst_uv, const uint8_t* src_uv,
|
||||
int dst_width, int x, int dx) =
|
||||
(src_width >= 32768) ? ScaleUVFilterCols64_C : ScaleUVFilterCols_C;
|
||||
int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
|
||||
@ -429,7 +429,7 @@ static void ScaleUVBilinearDown(int src_width,
|
||||
}
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
int yi = y >> 16;
|
||||
const uint8_t* src = src_uv + yi * (int64_t)src_stride;
|
||||
const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
|
||||
if (filtering == kFilterLinear) {
|
||||
ScaleUVFilterCols(dst_uv, src, dst_width, x, dx);
|
||||
} else {
|
||||
@ -464,10 +464,10 @@ static void ScaleUVBilinearUp(int src_width,
|
||||
int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
|
||||
void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
|
||||
void (*ScaleUVFilterCols)(uint8_t* dst_uv, const uint8_t* src_uv,
|
||||
int dst_width, int x, int dx) =
|
||||
filtering ? ScaleUVFilterCols_C : ScaleUVCols_C;
|
||||
const int max_y = (src_height - 1) << 16;
|
||||
@ -571,7 +571,7 @@ static void ScaleUVBilinearUp(int src_width,
|
||||
|
||||
{
|
||||
int yi = y >> 16;
|
||||
const uint8_t* src = src_uv + yi * (int64_t)src_stride;
|
||||
const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
|
||||
|
||||
// Allocate 2 rows of UV.
|
||||
const int row_size = (dst_width * 2 + 15) & ~15;
|
||||
@ -596,7 +596,7 @@ static void ScaleUVBilinearUp(int src_width,
|
||||
if (y > max_y) {
|
||||
y = max_y;
|
||||
yi = y >> 16;
|
||||
src = src_uv + yi * (int64_t)src_stride;
|
||||
src = src_uv + yi * (intptr_t)src_stride;
|
||||
}
|
||||
if (yi != lasty) {
|
||||
ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
|
||||
@ -663,13 +663,13 @@ void ScaleUVLinearUp2(int src_width,
|
||||
#endif
|
||||
|
||||
if (dst_height == 1) {
|
||||
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
|
||||
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
|
||||
dst_width);
|
||||
} else {
|
||||
dy = FixedDiv(src_height - 1, dst_height - 1);
|
||||
y = (1 << 15) - 1;
|
||||
for (i = 0; i < dst_height; ++i) {
|
||||
ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
|
||||
ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
|
||||
dst_uv += dst_stride;
|
||||
y += dy;
|
||||
}
|
||||
@ -770,13 +770,13 @@ void ScaleUVLinearUp2_16(int src_width,
|
||||
#endif
|
||||
|
||||
if (dst_height == 1) {
|
||||
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
|
||||
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
|
||||
dst_width);
|
||||
} else {
|
||||
dy = FixedDiv(src_height - 1, dst_height - 1);
|
||||
y = (1 << 15) - 1;
|
||||
for (i = 0; i < dst_height; ++i) {
|
||||
ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
|
||||
ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
|
||||
dst_uv += dst_stride;
|
||||
y += dy;
|
||||
}
|
||||
@ -854,7 +854,7 @@ static void ScaleUVSimple(int src_width,
|
||||
int y,
|
||||
int dy) {
|
||||
int j;
|
||||
void (*ScaleUVCols)(uint8_t * dst_uv, const uint8_t* src_uv, int dst_width,
|
||||
void (*ScaleUVCols)(uint8_t* dst_uv, const uint8_t* src_uv, int dst_width,
|
||||
int x, int dx) =
|
||||
(src_width >= 32768) ? ScaleUVCols64_C : ScaleUVCols_C;
|
||||
(void)src_height;
|
||||
@ -889,7 +889,7 @@ static void ScaleUVSimple(int src_width,
|
||||
}
|
||||
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
ScaleUVCols(dst_uv, src_uv + (y >> 16) * (int64_t)src_stride, dst_width, x,
|
||||
ScaleUVCols(dst_uv, src_uv + (y >> 16) * (intptr_t)src_stride, dst_width, x,
|
||||
dx);
|
||||
dst_uv += dst_stride;
|
||||
y += dy;
|
||||
@ -910,7 +910,7 @@ static int UVCopy(const uint8_t* src_uv,
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
|
||||
src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
|
||||
src_stride_uv = -src_stride_uv;
|
||||
}
|
||||
|
||||
@ -930,7 +930,7 @@ static int UVCopy_16(const uint16_t* src_uv,
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
|
||||
src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
|
||||
src_stride_uv = -src_stride_uv;
|
||||
}
|
||||
|
||||
@ -968,7 +968,7 @@ static void ScaleUV(const uint8_t* src,
|
||||
// Negative src_height means invert the image.
|
||||
if (src_height < 0) {
|
||||
src_height = -src_height;
|
||||
src = src + (src_height - 1) * (int64_t)src_stride;
|
||||
src = src + (src_height - 1) * (intptr_t)src_stride;
|
||||
src_stride = -src_stride;
|
||||
}
|
||||
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
|
||||
@ -983,7 +983,7 @@ static void ScaleUV(const uint8_t* src,
|
||||
if (clip_y) {
|
||||
int64_t clipf = (int64_t)(clip_y)*dy;
|
||||
y += (clipf & 0xffff);
|
||||
src += (clipf >> 16) * (int64_t)src_stride;
|
||||
src += (clipf >> 16) * (intptr_t)src_stride;
|
||||
dst += clip_y * dst_stride;
|
||||
}
|
||||
|
||||
@ -1024,7 +1024,7 @@ static void ScaleUV(const uint8_t* src,
|
||||
#ifdef HAS_UVCOPY
|
||||
if (dx == 0x10000 && dy == 0x10000) {
|
||||
// Straight copy.
|
||||
UVCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 2,
|
||||
UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2,
|
||||
src_stride, dst, dst_stride, clip_width, clip_height);
|
||||
return;
|
||||
}
|
||||
@ -1118,7 +1118,7 @@ int UVScale_16(const uint16_t* src_uv,
|
||||
// Negative src_height means invert the image.
|
||||
if (src_height < 0) {
|
||||
src_height = -src_height;
|
||||
src_uv = src_uv + (src_height - 1) * (int64_t)src_stride_uv;
|
||||
src_uv = src_uv + (src_height - 1) * (intptr_t)src_stride_uv;
|
||||
src_stride_uv = -src_stride_uv;
|
||||
}
|
||||
src_width = Abs(src_width);
|
||||
@ -1126,13 +1126,13 @@ int UVScale_16(const uint16_t* src_uv,
|
||||
#ifdef HAS_UVCOPY
|
||||
if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) {
|
||||
if (dst_height == 1) {
|
||||
UVCopy_16(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride_uv,
|
||||
UVCopy_16(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride_uv,
|
||||
src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height);
|
||||
} else {
|
||||
dy = src_height / dst_height;
|
||||
UVCopy_16(src_uv + ((dy - 1) / 2) * (int64_t)src_stride_uv,
|
||||
dy * (int64_t)src_stride_uv, dst_uv, dst_stride_uv, dst_width,
|
||||
dst_height);
|
||||
UVCopy_16(src_uv + ((dy - 1) / 2) * (intptr_t)src_stride_uv,
|
||||
(int)(dy * (intptr_t)src_stride_uv), dst_uv, dst_stride_uv,
|
||||
dst_width, dst_height);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -48,6 +48,7 @@ namespace libyuv {
|
||||
#define AR30ToAR30 ARGBCopy
|
||||
#define ABGRToABGR ARGBCopy
|
||||
|
||||
// subsample amount uses a divide.
|
||||
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
|
||||
|
||||
// Planar test
|
||||
@ -180,6 +181,7 @@ TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12)
|
||||
TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10)
|
||||
TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10)
|
||||
TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10)
|
||||
TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 10)
|
||||
TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10)
|
||||
TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12)
|
||||
TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12)
|
||||
@ -417,131 +419,136 @@ TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10)
|
||||
TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12)
|
||||
TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
|
||||
|
||||
#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
|
||||
DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
|
||||
DOY, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
|
||||
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
|
||||
static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
|
||||
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
|
||||
static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
|
||||
"SRC_SUBSAMP_X unsupported"); \
|
||||
static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
|
||||
"SRC_SUBSAMP_Y unsupported"); \
|
||||
static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
|
||||
"DST_SUBSAMP_X unsupported"); \
|
||||
static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
|
||||
"DST_SUBSAMP_Y unsupported"); \
|
||||
const int kWidth = W1280; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
|
||||
const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
|
||||
const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
|
||||
const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
|
||||
const int kPaddedHeight = \
|
||||
(kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
|
||||
const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
|
||||
const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
|
||||
align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
|
||||
align_buffer_page_end( \
|
||||
src_uv, \
|
||||
2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
|
||||
align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
|
||||
align_buffer_page_end(dst_uv_c, \
|
||||
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
|
||||
align_buffer_page_end(dst_uv_opt, \
|
||||
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||
SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
|
||||
SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
|
||||
for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \
|
||||
src_y_p[i] = \
|
||||
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
|
||||
} \
|
||||
for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \
|
||||
src_uv_p[i] = \
|
||||
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
|
||||
} \
|
||||
memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
|
||||
memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||
memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
|
||||
memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \
|
||||
DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \
|
||||
reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \
|
||||
NEG kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \
|
||||
DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \
|
||||
reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \
|
||||
NEG kHeight); \
|
||||
} \
|
||||
if (DOY) { \
|
||||
for (int i = 0; i < kHeight; ++i) { \
|
||||
for (int j = 0; j < kWidth; ++j) { \
|
||||
EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
for (int i = 0; i < kDstHalfHeight; ++i) { \
|
||||
for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \
|
||||
EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \
|
||||
dst_uv_opt[i * 2 * kDstHalfWidth + j]); \
|
||||
} \
|
||||
} \
|
||||
free_aligned_buffer_page_end(dst_y_c); \
|
||||
free_aligned_buffer_page_end(dst_uv_c); \
|
||||
free_aligned_buffer_page_end(dst_y_opt); \
|
||||
free_aligned_buffer_page_end(dst_uv_opt); \
|
||||
free_aligned_buffer_page_end(src_y); \
|
||||
free_aligned_buffer_page_end(src_uv); \
|
||||
#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
|
||||
DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH, \
|
||||
TILE_WIDTH, TILE_HEIGHT) \
|
||||
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
|
||||
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
|
||||
static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
|
||||
"SRC_SUBSAMP_X unsupported"); \
|
||||
static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
|
||||
"SRC_SUBSAMP_Y unsupported"); \
|
||||
static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
|
||||
"DST_SUBSAMP_X unsupported"); \
|
||||
static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
|
||||
"DST_SUBSAMP_Y unsupported"); \
|
||||
const int kWidth = W1280; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
|
||||
const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
|
||||
const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
|
||||
const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
|
||||
const int kPaddedHeight = \
|
||||
(kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
|
||||
const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
|
||||
const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
|
||||
align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
|
||||
align_buffer_page_end( \
|
||||
src_uv, \
|
||||
2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
|
||||
align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
|
||||
align_buffer_page_end(dst_uv_c, \
|
||||
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
|
||||
align_buffer_page_end(dst_uv_opt, \
|
||||
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||
SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
|
||||
SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
|
||||
for (int i = 0; \
|
||||
i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T); \
|
||||
++i) { \
|
||||
src_y_p[i] = \
|
||||
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
|
||||
} \
|
||||
for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 * \
|
||||
SRC_BPC / (int)sizeof(SRC_T); \
|
||||
++i) { \
|
||||
src_uv_p[i] = \
|
||||
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
|
||||
} \
|
||||
memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
|
||||
memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||
memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
|
||||
memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
|
||||
2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
|
||||
DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \
|
||||
reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \
|
||||
NEG kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
|
||||
2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
|
||||
DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \
|
||||
reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \
|
||||
NEG kHeight); \
|
||||
} \
|
||||
if (DOY) { \
|
||||
for (int i = 0; i < kHeight; ++i) { \
|
||||
for (int j = 0; j < kWidth; ++j) { \
|
||||
EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
for (int i = 0; i < kDstHalfHeight; ++i) { \
|
||||
for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \
|
||||
EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \
|
||||
dst_uv_opt[i * 2 * kDstHalfWidth + j]); \
|
||||
} \
|
||||
} \
|
||||
free_aligned_buffer_page_end(dst_y_c); \
|
||||
free_aligned_buffer_page_end(dst_uv_c); \
|
||||
free_aligned_buffer_page_end(dst_y_opt); \
|
||||
free_aligned_buffer_page_end(dst_uv_opt); \
|
||||
free_aligned_buffer_page_end(src_y); \
|
||||
free_aligned_buffer_page_end(src_uv); \
|
||||
}
|
||||
|
||||
#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
|
||||
DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
|
||||
TILE_HEIGHT) \
|
||||
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
|
||||
DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, 1, \
|
||||
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
|
||||
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
|
||||
DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, 1, \
|
||||
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
|
||||
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
|
||||
DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1, \
|
||||
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
|
||||
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
|
||||
DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, \
|
||||
TILE_WIDTH, TILE_HEIGHT) \
|
||||
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
|
||||
DST_SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0, \
|
||||
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
|
||||
#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
|
||||
DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
|
||||
TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
|
||||
benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \
|
||||
TILE_HEIGHT) \
|
||||
TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
|
||||
benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH, \
|
||||
TILE_HEIGHT) \
|
||||
TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
|
||||
benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH, \
|
||||
TILE_HEIGHT) \
|
||||
TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
|
||||
benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \
|
||||
TILE_HEIGHT) \
|
||||
TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
|
||||
benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \
|
||||
TILE_HEIGHT)
|
||||
|
||||
TESTBIPLANARTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
|
||||
TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
|
||||
TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
|
||||
TESTBIPLANARTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
|
||||
TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
|
||||
TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
|
||||
TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
|
||||
TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
|
||||
TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
|
||||
TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
|
||||
TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
|
||||
TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
|
||||
TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
|
||||
TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
|
||||
TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
|
||||
TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
|
||||
TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
|
||||
TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
|
||||
TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
|
||||
TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
|
||||
TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
|
||||
TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
|
||||
TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32)
|
||||
|
||||
#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
|
||||
DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
|
||||
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
|
||||
#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
|
||||
DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH, \
|
||||
TILE_HEIGHT) \
|
||||
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
|
||||
static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
|
||||
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
|
||||
@ -621,30 +628,30 @@ TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
|
||||
free_aligned_buffer_page_end(src_uv); \
|
||||
}
|
||||
|
||||
#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
|
||||
DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
|
||||
TILE_HEIGHT) \
|
||||
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
|
||||
DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, \
|
||||
TILE_WIDTH, TILE_HEIGHT) \
|
||||
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
|
||||
DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \
|
||||
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
|
||||
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
|
||||
DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH, \
|
||||
TILE_WIDTH, TILE_HEIGHT) \
|
||||
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
|
||||
DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH, \
|
||||
TILE_WIDTH, TILE_HEIGHT)
|
||||
#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
|
||||
DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
|
||||
TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
|
||||
benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH, \
|
||||
TILE_HEIGHT) \
|
||||
TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
|
||||
benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH, \
|
||||
TILE_HEIGHT) \
|
||||
TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
|
||||
benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH, \
|
||||
TILE_HEIGHT) \
|
||||
TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
|
||||
benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
|
||||
|
||||
TESTBIPLANARTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
|
||||
TESTBIPLANARTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
|
||||
TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
|
||||
TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
|
||||
TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
|
||||
TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
|
||||
TESTBPTOP(P010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10, 1, 1)
|
||||
TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1)
|
||||
|
||||
// Provide matrix wrappers for full range bt.709
|
||||
#define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \
|
||||
@ -1069,8 +1076,8 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
|
||||
TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
|
||||
#endif
|
||||
|
||||
#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \
|
||||
BPP_B, W1280, N, NEG, OFF) \
|
||||
#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
W1280, N, NEG, OFF) \
|
||||
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
|
||||
const int kWidth = W1280; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
@ -1123,15 +1130,15 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
|
||||
free_aligned_buffer_page_end(dst_argb32_opt); \
|
||||
}
|
||||
|
||||
#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_ + 1, _Any, +, 0) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_, _Unaligned, +, 2) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_, _Invert, -, 0) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_, _Opt, +, 0)
|
||||
#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
|
||||
TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_ + 1, _Any, +, 0) \
|
||||
TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_, _Unaligned, +, 2) \
|
||||
TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_, _Invert, -, 0) \
|
||||
TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_, _Opt, +, 0)
|
||||
|
||||
#define JNV12ToARGB(a, b, c, d, e, f, g, h) \
|
||||
NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
|
||||
@ -1152,29 +1159,29 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
|
||||
#define JNV12ToRGB565(a, b, c, d, e, f, g, h) \
|
||||
NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
|
||||
|
||||
TESTBIPLANARTOB(JNV12, 2, 2, ARGB, ARGB, 4)
|
||||
TESTBIPLANARTOB(JNV21, 2, 2, ARGB, ARGB, 4)
|
||||
TESTBIPLANARTOB(JNV12, 2, 2, ABGR, ABGR, 4)
|
||||
TESTBIPLANARTOB(JNV21, 2, 2, ABGR, ABGR, 4)
|
||||
TESTBIPLANARTOB(JNV12, 2, 2, RGB24, RGB24, 3)
|
||||
TESTBIPLANARTOB(JNV21, 2, 2, RGB24, RGB24, 3)
|
||||
TESTBIPLANARTOB(JNV12, 2, 2, RAW, RAW, 3)
|
||||
TESTBIPLANARTOB(JNV21, 2, 2, RAW, RAW, 3)
|
||||
TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4)
|
||||
TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4)
|
||||
TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4)
|
||||
TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4)
|
||||
TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3)
|
||||
TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3)
|
||||
TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3)
|
||||
TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3)
|
||||
#ifdef LITTLE_ENDIAN_ONLY_TEST
|
||||
TESTBIPLANARTOB(JNV12, 2, 2, RGB565, RGB565, 2)
|
||||
TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2)
|
||||
#endif
|
||||
|
||||
TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3)
|
||||
TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4)
|
||||
TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4)
|
||||
TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4)
|
||||
TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4)
|
||||
TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3)
|
||||
TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3)
|
||||
TESTBPTOB(NV12, 2, 2, RAW, RAW, 3)
|
||||
TESTBPTOB(NV21, 2, 2, RAW, RAW, 3)
|
||||
TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3)
|
||||
#ifdef LITTLE_ENDIAN_ONLY_TEST
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2)
|
||||
TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
|
||||
#endif
|
||||
|
||||
#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
@ -1269,8 +1276,8 @@ TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1)
|
||||
TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2)
|
||||
TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
|
||||
|
||||
#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \
|
||||
SUBSAMP_Y, W1280, N, NEG, OFF) \
|
||||
#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
W1280, N, NEG, OFF) \
|
||||
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
|
||||
const int kWidth = W1280; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
@ -1316,25 +1323,25 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
|
||||
free_aligned_buffer_page_end(src_argb); \
|
||||
}
|
||||
|
||||
#define TESTATOBIPLANAR(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
|
||||
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
benchmark_width_ + 1, _Any, +, 0) \
|
||||
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
benchmark_width_, _Unaligned, +, 2) \
|
||||
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
benchmark_width_, _Invert, -, 0) \
|
||||
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
benchmark_width_, _Opt, +, 0)
|
||||
#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
|
||||
TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
benchmark_width_ + 1, _Any, +, 0) \
|
||||
TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
benchmark_width_, _Unaligned, +, 2) \
|
||||
TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
benchmark_width_, _Invert, -, 0) \
|
||||
TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
benchmark_width_, _Opt, +, 0)
|
||||
|
||||
TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2)
|
||||
TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2)
|
||||
TESTATOBIPLANAR(ABGR, 1, 4, NV12, 2, 2)
|
||||
TESTATOBIPLANAR(ABGR, 1, 4, NV21, 2, 2)
|
||||
TESTATOBIPLANAR(RAW, 1, 3, JNV21, 2, 2)
|
||||
TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2)
|
||||
TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
|
||||
TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)
|
||||
TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
|
||||
TESTATOBP(ARGB, 1, 4, NV12, 2, 2)
|
||||
TESTATOBP(ARGB, 1, 4, NV21, 2, 2)
|
||||
TESTATOBP(ABGR, 1, 4, NV12, 2, 2)
|
||||
TESTATOBP(ABGR, 1, 4, NV21, 2, 2)
|
||||
TESTATOBP(RAW, 1, 3, JNV21, 2, 2)
|
||||
TESTATOBP(YUY2, 2, 4, NV12, 2, 2)
|
||||
TESTATOBP(UYVY, 2, 4, NV12, 2, 2)
|
||||
TESTATOBP(AYUV, 1, 4, NV12, 2, 2)
|
||||
TESTATOBP(AYUV, 1, 4, NV21, 2, 2)
|
||||
|
||||
#define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
|
||||
EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \
|
||||
@ -3915,8 +3922,8 @@ TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10)
|
||||
TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
|
||||
#endif // DISABLE_SLOW_TESTS
|
||||
|
||||
#define TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
|
||||
#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
|
||||
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
|
||||
const int kWidth = W1280; \
|
||||
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
||||
@ -3959,16 +3966,16 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
|
||||
free_aligned_buffer_page_end(dst_argb_opt); \
|
||||
}
|
||||
|
||||
#define TESTBIPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
ALIGN, YALIGN, S_DEPTH) \
|
||||
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \
|
||||
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \
|
||||
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
|
||||
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
|
||||
#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, S_DEPTH) \
|
||||
TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
|
||||
benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \
|
||||
TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
|
||||
benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \
|
||||
TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
|
||||
benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
|
||||
TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
|
||||
benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
|
||||
|
||||
#define P010ToARGB(a, b, c, d, e, f, g, h) \
|
||||
P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
|
||||
@ -4011,23 +4018,23 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
|
||||
kFilterBilinear)
|
||||
|
||||
#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
|
||||
TESTBIPLANAR16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
|
||||
TESTBIPLANAR16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
|
||||
TESTBIPLANAR16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
|
||||
TESTBIPLANAR16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
|
||||
TESTBIPLANAR16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
|
||||
TESTBIPLANAR16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
|
||||
TESTBIPLANAR16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
|
||||
TESTBIPLANAR16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
|
||||
TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
|
||||
TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
|
||||
TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
|
||||
TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
|
||||
TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
|
||||
TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
|
||||
TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
|
||||
TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
|
||||
#ifdef LITTLE_ENDIAN_ONLY_TEST
|
||||
TESTBIPLANAR16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
|
||||
TESTBIPLANAR16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
|
||||
TESTBIPLANAR16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
|
||||
TESTBIPLANAR16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
|
||||
TESTBIPLANAR16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
|
||||
TESTBIPLANAR16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
|
||||
TESTBIPLANAR16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
|
||||
TESTBIPLANAR16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
|
||||
TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
|
||||
TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
|
||||
TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
|
||||
TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
|
||||
TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
|
||||
TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
|
||||
TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
|
||||
TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
|
||||
#endif // LITTLE_ENDIAN_ONLY_TEST
|
||||
#endif // DISABLE_SLOW_TESTS
|
||||
|
||||
|
@ -225,4 +225,110 @@ TEST_F(LibYUVRotateTest, RotatePlane90_TestStride) {
|
||||
free_aligned_buffer_page_end(src_argb);
|
||||
}
|
||||
|
||||
static void TestRotatePlane_16(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
libyuv::RotationMode mode,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (src_width < 1) {
|
||||
src_width = 1;
|
||||
}
|
||||
if (src_height < 1) {
|
||||
src_height = 1;
|
||||
}
|
||||
if (dst_width < 1) {
|
||||
dst_width = 1;
|
||||
}
|
||||
if (dst_height < 1) {
|
||||
dst_height = 1;
|
||||
}
|
||||
int src_stride = src_width;
|
||||
int src_plane_size = src_stride * abs(src_height);
|
||||
align_buffer_page_end_16(src, src_plane_size);
|
||||
for (int i = 0; i < src_plane_size; ++i) {
|
||||
src[i] = fastrand() & 0xff;
|
||||
}
|
||||
|
||||
int dst_stride = dst_width;
|
||||
int dst_plane_size = dst_stride * dst_height;
|
||||
align_buffer_page_end_16(dst_c, dst_plane_size);
|
||||
align_buffer_page_end_16(dst_opt, dst_plane_size);
|
||||
memset(dst_c, 2, dst_plane_size);
|
||||
memset(dst_opt, 3, dst_plane_size);
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
RotatePlane_16(src, src_stride, dst_c, dst_stride, src_width, src_height,
|
||||
mode);
|
||||
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
for (int i = 0; i < benchmark_iterations; ++i) {
|
||||
RotatePlane_16(src, src_stride, dst_opt, dst_stride, src_width, src_height,
|
||||
mode);
|
||||
}
|
||||
|
||||
// Rotation should be exact.
|
||||
for (int i = 0; i < dst_plane_size; ++i) {
|
||||
EXPECT_EQ(dst_c[i], dst_opt[i]);
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end_16(dst_c);
|
||||
free_aligned_buffer_page_end_16(dst_opt);
|
||||
free_aligned_buffer_page_end_16(src);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, RotatePlane0_16_Opt) {
|
||||
TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate0, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, RotatePlane90_16_Opt) {
|
||||
TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate90, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, RotatePlane180_16_Opt) {
|
||||
TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate180, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, RotatePlane270_16_Opt) {
|
||||
TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate270, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, RotatePlane0_16_Odd) {
|
||||
TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
|
||||
benchmark_width_ + 1, benchmark_height_ + 1, kRotate0,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, RotatePlane90_16_Odd) {
|
||||
TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
|
||||
benchmark_height_ + 1, benchmark_width_ + 1, kRotate90,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, RotatePlane180_16_Odd) {
|
||||
TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
|
||||
benchmark_width_ + 1, benchmark_height_ + 1, kRotate180,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, RotatePlane270_16_Odd) {
|
||||
TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
|
||||
benchmark_height_ + 1, benchmark_width_ + 1, kRotate270,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
} // namespace libyuv
|
||||
|
@ -596,4 +596,266 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
|
||||
#undef TESTAPLANARTOP
|
||||
#undef TESTAPLANARTOPI
|
||||
|
||||
static void I010TestRotate(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
libyuv::RotationMode mode,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (src_width < 1) {
|
||||
src_width = 1;
|
||||
}
|
||||
if (src_height == 0) {
|
||||
src_height = 1;
|
||||
}
|
||||
if (dst_width < 1) {
|
||||
dst_width = 1;
|
||||
}
|
||||
if (dst_height < 1) {
|
||||
dst_height = 1;
|
||||
}
|
||||
int src_i010_y_size = src_width * Abs(src_height);
|
||||
int src_i010_uv_size = ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2);
|
||||
int src_i010_size = src_i010_y_size + src_i010_uv_size * 2;
|
||||
align_buffer_page_end_16(src_i010, src_i010_size);
|
||||
for (int i = 0; i < src_i010_size; ++i) {
|
||||
src_i010[i] = fastrand() & 0x3ff;
|
||||
}
|
||||
|
||||
int dst_i010_y_size = dst_width * dst_height;
|
||||
int dst_i010_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2);
|
||||
int dst_i010_size = dst_i010_y_size + dst_i010_uv_size * 2;
|
||||
align_buffer_page_end_16(dst_i010_c, dst_i010_size);
|
||||
align_buffer_page_end_16(dst_i010_opt, dst_i010_size);
|
||||
memset(dst_i010_c, 2, dst_i010_size * 2);
|
||||
memset(dst_i010_opt, 3, dst_i010_size * 2);
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
I010Rotate(src_i010, src_width, src_i010 + src_i010_y_size,
|
||||
(src_width + 1) / 2, src_i010 + src_i010_y_size + src_i010_uv_size,
|
||||
(src_width + 1) / 2, dst_i010_c, dst_width,
|
||||
dst_i010_c + dst_i010_y_size, (dst_width + 1) / 2,
|
||||
dst_i010_c + dst_i010_y_size + dst_i010_uv_size,
|
||||
(dst_width + 1) / 2, src_width, src_height, mode);
|
||||
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
for (int i = 0; i < benchmark_iterations; ++i) {
|
||||
I010Rotate(
|
||||
src_i010, src_width, src_i010 + src_i010_y_size, (src_width + 1) / 2,
|
||||
src_i010 + src_i010_y_size + src_i010_uv_size, (src_width + 1) / 2,
|
||||
dst_i010_opt, dst_width, dst_i010_opt + dst_i010_y_size,
|
||||
(dst_width + 1) / 2, dst_i010_opt + dst_i010_y_size + dst_i010_uv_size,
|
||||
(dst_width + 1) / 2, src_width, src_height, mode);
|
||||
}
|
||||
|
||||
// Rotation should be exact.
|
||||
for (int i = 0; i < dst_i010_size; ++i) {
|
||||
EXPECT_EQ(dst_i010_c[i], dst_i010_opt[i]);
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end_16(dst_i010_c);
|
||||
free_aligned_buffer_page_end_16(dst_i010_opt);
|
||||
free_aligned_buffer_page_end_16(src_i010);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I010Rotate0_Opt) {
|
||||
I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate0, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I010Rotate90_Opt) {
|
||||
I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate90, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I010Rotate180_Opt) {
|
||||
I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate180, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I010Rotate270_Opt) {
|
||||
I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate270, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
static void I210TestRotate(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
libyuv::RotationMode mode,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (src_width < 1) {
|
||||
src_width = 1;
|
||||
}
|
||||
if (src_height == 0) {
|
||||
src_height = 1;
|
||||
}
|
||||
if (dst_width < 1) {
|
||||
dst_width = 1;
|
||||
}
|
||||
if (dst_height < 1) {
|
||||
dst_height = 1;
|
||||
}
|
||||
int src_i210_y_size = src_width * Abs(src_height);
|
||||
int src_i210_uv_size = ((src_width + 1) / 2) * Abs(src_height);
|
||||
int src_i210_size = src_i210_y_size + src_i210_uv_size * 2;
|
||||
align_buffer_page_end_16(src_i210, src_i210_size);
|
||||
for (int i = 0; i < src_i210_size; ++i) {
|
||||
src_i210[i] = fastrand() & 0x3ff;
|
||||
}
|
||||
|
||||
int dst_i210_y_size = dst_width * dst_height;
|
||||
int dst_i210_uv_size = ((dst_width + 1) / 2) * dst_height;
|
||||
int dst_i210_size = dst_i210_y_size + dst_i210_uv_size * 2;
|
||||
align_buffer_page_end_16(dst_i210_c, dst_i210_size);
|
||||
align_buffer_page_end_16(dst_i210_opt, dst_i210_size);
|
||||
memset(dst_i210_c, 2, dst_i210_size * 2);
|
||||
memset(dst_i210_opt, 3, dst_i210_size * 2);
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
I210Rotate(src_i210, src_width, src_i210 + src_i210_y_size,
|
||||
(src_width + 1) / 2, src_i210 + src_i210_y_size + src_i210_uv_size,
|
||||
(src_width + 1) / 2, dst_i210_c, dst_width,
|
||||
dst_i210_c + dst_i210_y_size, (dst_width + 1) / 2,
|
||||
dst_i210_c + dst_i210_y_size + dst_i210_uv_size,
|
||||
(dst_width + 1) / 2, src_width, src_height, mode);
|
||||
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
for (int i = 0; i < benchmark_iterations; ++i) {
|
||||
I210Rotate(
|
||||
src_i210, src_width, src_i210 + src_i210_y_size, (src_width + 1) / 2,
|
||||
src_i210 + src_i210_y_size + src_i210_uv_size, (src_width + 1) / 2,
|
||||
dst_i210_opt, dst_width, dst_i210_opt + dst_i210_y_size,
|
||||
(dst_width + 1) / 2, dst_i210_opt + dst_i210_y_size + dst_i210_uv_size,
|
||||
(dst_width + 1) / 2, src_width, src_height, mode);
|
||||
}
|
||||
|
||||
// Rotation should be exact.
|
||||
for (int i = 0; i < dst_i210_size; ++i) {
|
||||
EXPECT_EQ(dst_i210_c[i], dst_i210_opt[i]);
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end_16(dst_i210_c);
|
||||
free_aligned_buffer_page_end_16(dst_i210_opt);
|
||||
free_aligned_buffer_page_end_16(src_i210);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I210Rotate0_Opt) {
|
||||
I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate0, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I210Rotate90_Opt) {
|
||||
I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate90, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I210Rotate180_Opt) {
|
||||
I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate180, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I210Rotate270_Opt) {
|
||||
I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate270, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
static void I410TestRotate(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
libyuv::RotationMode mode,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (src_width < 1) {
|
||||
src_width = 1;
|
||||
}
|
||||
if (src_height == 0) {
|
||||
src_height = 1;
|
||||
}
|
||||
if (dst_width < 1) {
|
||||
dst_width = 1;
|
||||
}
|
||||
if (dst_height < 1) {
|
||||
dst_height = 1;
|
||||
}
|
||||
int src_i410_y_size = src_width * Abs(src_height);
|
||||
int src_i410_uv_size = src_width * Abs(src_height);
|
||||
int src_i410_size = src_i410_y_size + src_i410_uv_size * 2;
|
||||
align_buffer_page_end_16(src_i410, src_i410_size);
|
||||
for (int i = 0; i < src_i410_size; ++i) {
|
||||
src_i410[i] = fastrand() & 0x3ff;
|
||||
}
|
||||
|
||||
int dst_i410_y_size = dst_width * dst_height;
|
||||
int dst_i410_uv_size = dst_width * dst_height;
|
||||
int dst_i410_size = dst_i410_y_size + dst_i410_uv_size * 2;
|
||||
align_buffer_page_end_16(dst_i410_c, dst_i410_size);
|
||||
align_buffer_page_end_16(dst_i410_opt, dst_i410_size);
|
||||
memset(dst_i410_c, 2, dst_i410_size * 2);
|
||||
memset(dst_i410_opt, 3, dst_i410_size * 2);
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width,
|
||||
src_i410 + src_i410_y_size + src_i410_uv_size, src_width,
|
||||
dst_i410_c, dst_width, dst_i410_c + dst_i410_y_size, dst_width,
|
||||
dst_i410_c + dst_i410_y_size + dst_i410_uv_size, dst_width,
|
||||
src_width, src_height, mode);
|
||||
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
for (int i = 0; i < benchmark_iterations; ++i) {
|
||||
I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width,
|
||||
src_i410 + src_i410_y_size + src_i410_uv_size, src_width,
|
||||
dst_i410_opt, dst_width, dst_i410_opt + dst_i410_y_size,
|
||||
dst_width, dst_i410_opt + dst_i410_y_size + dst_i410_uv_size,
|
||||
dst_width, src_width, src_height, mode);
|
||||
}
|
||||
|
||||
// Rotation should be exact.
|
||||
for (int i = 0; i < dst_i410_size; ++i) {
|
||||
EXPECT_EQ(dst_i410_c[i], dst_i410_opt[i]);
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end_16(dst_i410_c);
|
||||
free_aligned_buffer_page_end_16(dst_i410_opt);
|
||||
free_aligned_buffer_page_end_16(src_i410);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I410Rotate0_Opt) {
|
||||
I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate0, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I410Rotate90_Opt) {
|
||||
I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate90, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I410Rotate180_Opt) {
|
||||
I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate180, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I410Rotate270_Opt) {
|
||||
I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate270, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
} // namespace libyuv
|
||||
|
@ -11,6 +11,7 @@
|
||||
#ifndef UNIT_TEST_UNIT_TEST_H_ // NOLINT
|
||||
#define UNIT_TEST_UNIT_TEST_H_
|
||||
|
||||
#include <stddef.h> // For NULL
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#else
|
||||
@ -76,7 +77,18 @@ static inline bool SizeValid(int src_width,
|
||||
|
||||
#define free_aligned_buffer_page_end(var) \
|
||||
free(var##_mem); \
|
||||
var = 0
|
||||
var = NULL
|
||||
|
||||
#define align_buffer_page_end_16(var, size) \
|
||||
uint8_t* var##_mem = \
|
||||
reinterpret_cast<uint8_t*>(malloc(((size)*2 + 4095 + 63) & ~4095)); \
|
||||
uint16_t* var = reinterpret_cast<uint16_t*>( \
|
||||
(intptr_t)(var##_mem + (((size)*2 + 4095 + 63) & ~4095) - (size)*2) & \
|
||||
~63)
|
||||
|
||||
#define free_aligned_buffer_page_end_16(var) \
|
||||
free(var##_mem); \
|
||||
var = NULL
|
||||
|
||||
#ifdef WIN32
|
||||
static inline double get_time() {
|
||||
|
@ -43,9 +43,10 @@
|
||||
// #define BR (-VR * 128 + YB)
|
||||
|
||||
int main(int argc, const char* argv[]) {
|
||||
if (argc < 2) {
|
||||
printf("yuvconstants Kr Kb\n");
|
||||
printf(" MC BT KR = 0.2126; KB = 0.0722\n");
|
||||
if (argc < 3) {
|
||||
printf("yuvconstants [KR] [KB]\n");
|
||||
printf(" e.g. yuvconstants 0.2126 0.0722\n");
|
||||
printf(" MC BT KR KB\n");
|
||||
printf(" 1 BT.709 KR = 0.2126; KB = 0.0722\n");
|
||||
printf(" 4 FCC KR = 0.30; KB = 0.11\n");
|
||||
printf(" 6 BT.601 KR = 0.299; KB = 0.114\n");
|
||||
|
@ -23,7 +23,7 @@ origin:
|
||||
|
||||
# Revision to pull in
|
||||
# Must be a long or short commit SHA (long preferred)
|
||||
revision: ea26d7adb1da4c1bd80e99b9d2f6e9ca0b9cde0e
|
||||
revision: b2528b0be934de1918e20c85fc170d809eeb49ab
|
||||
|
||||
# The package's license, where possible using the mnemonic from
|
||||
# https://spdx.org/licenses/
|
||||
|
Loading…
x
Reference in New Issue
Block a user