Bug 1891458 - Update libvpx to 85dafa9c61f99330f484e77297684b42af6ff37d r=media-playback-reviewers,padenot

This patch runs `./mach vendor media/libvpx/moz.yaml --patch-mode=none`
to update libvpx to 85dafa9c61f99330f484e77297684b42af6ff37d.

Depends on D207805

Differential Revision: https://phabricator.services.mozilla.com/D207806
This commit is contained in:
Chun-Min Chang 2024-04-18 15:58:07 +00:00
parent 61c284e79f
commit 61521428c9
82 changed files with 1450 additions and 640 deletions

View File

@ -73,6 +73,10 @@ sub vpx_config($) {
}
sub specialize {
if (@_ <= 1) {
die "'specialize' must be called with a function name and at least one ",
"architecture ('C' is implied): \n@_\n";
}
my $fn=$_[0];
shift;
foreach my $opt (@_) {

View File

@ -16,6 +16,7 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_config.h"
#include "vpx_dsp/postproc.h"
#include "vpx_mem/vpx_mem.h"

View File

@ -12,6 +12,7 @@
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
#include "test/util.h"
#include "vpx_config.h"
namespace {
#if CONFIG_VP8_ENCODER

View File

@ -10,6 +10,7 @@
#include <stdio.h>
#include <algorithm>
#include <cstdlib>
#include "test/bench.h"
#include "vpx_ports/vpx_timer.h"

View File

@ -14,6 +14,7 @@
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
#include "test/util.h"
#include "vpx_config.h"
namespace {
@ -79,6 +80,11 @@ TEST_P(BordersTest, TestLowBitrate) {
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
#if CONFIG_REALTIME_ONLY
VP9_INSTANTIATE_TEST_SUITE(BordersTest,
::testing::Values(::libvpx_test::kRealTime));
#else
VP9_INSTANTIATE_TEST_SUITE(BordersTest,
::testing::Values(::libvpx_test::kTwoPassGood));
#endif
} // namespace

View File

@ -15,6 +15,7 @@
#include "test/acm_random.h"
#include "test/buffer.h"
#include "test/register_state_check.h"
#include "vpx_config.h"
#include "vpx_ports/vpx_timer.h"
namespace {

View File

@ -1228,10 +1228,16 @@ WRAP(convolve8_avg_horiz_sve, 12)
#endif // HAVE_SVE
#if HAVE_SVE2
WRAP(convolve8_sve2, 8)
WRAP(convolve8_avg_sve2, 8)
WRAP(convolve8_vert_sve2, 8)
WRAP(convolve8_avg_vert_sve2, 8)
WRAP(convolve8_sve2, 10)
WRAP(convolve8_avg_sve2, 10)
WRAP(convolve8_vert_sve2, 10)
WRAP(convolve8_avg_vert_sve2, 10)
WRAP(convolve8_sve2, 12)
WRAP(convolve8_avg_sve2, 12)
WRAP(convolve8_vert_sve2, 12)
WRAP(convolve8_avg_vert_sve2, 12)
#endif // HAVE_SVE2
@ -1495,23 +1501,23 @@ INSTANTIATE_TEST_SUITE_P(SVE, ConvolveTest,
const ConvolveFunctions convolve8_sve2(
wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, wrap_convolve8_horiz_c_8,
wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_sve2_8,
wrap_convolve8_avg_vert_sve2_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8,
wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8,
wrap_convolve8_avg_c_8, 8);
wrap_convolve8_avg_vert_sve2_8, wrap_convolve8_sve2_8,
wrap_convolve8_avg_sve2_8, wrap_convolve8_horiz_c_8,
wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
const ConvolveFunctions convolve10_sve2(
wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, wrap_convolve8_horiz_c_10,
wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_sve2_10,
wrap_convolve8_avg_vert_sve2_10, wrap_convolve8_c_10,
wrap_convolve8_avg_c_10, wrap_convolve8_horiz_c_10,
wrap_convolve8_avg_vert_sve2_10, wrap_convolve8_sve2_10,
wrap_convolve8_avg_sve2_10, wrap_convolve8_horiz_c_10,
wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10,
wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10,
10);
const ConvolveFunctions convolve12_sve2(
wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, wrap_convolve8_horiz_c_12,
wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_sve2_12,
wrap_convolve8_avg_vert_sve2_12, wrap_convolve8_c_12,
wrap_convolve8_avg_c_12, wrap_convolve8_horiz_c_12,
wrap_convolve8_avg_vert_sve2_12, wrap_convolve8_sve2_12,
wrap_convolve8_avg_sve2_12, wrap_convolve8_horiz_c_12,
wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12,
wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12,
12);

View File

@ -148,9 +148,6 @@ TEST_P(CpuSpeedTest, TestLowBitrate) {
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
VP9_INSTANTIATE_TEST_SUITE(CpuSpeedTest,
::testing::Values(::libvpx_test::kTwoPassGood,
::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
VP9_INSTANTIATE_TEST_SUITE(CpuSpeedTest, ONE_PASS_TEST_MODES,
::testing::Range(0, 10));
} // namespace

View File

@ -14,6 +14,7 @@
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
#include "test/util.h"
#include "vpx_config.h"
namespace {
@ -104,6 +105,10 @@ CQTest::BitrateMap CQTest::bitrates_;
TEST_P(CQTest, LinearPSNRIsHigherForCQLevel) {
const vpx_rational timebase = { 33333333, 1000000000 };
#if CONFIG_REALTIME_ONlY
GTEST_SKIP()
<< "Non-zero g_lag_in_frames is unsupported with CONFIG_REALTIME_ONLY";
#else
cfg_.g_timebase = timebase;
cfg_.rc_target_bitrate = kCQTargetBitrate;
cfg_.g_lag_in_frames = 25;
@ -124,6 +129,7 @@ TEST_P(CQTest, LinearPSNRIsHigherForCQLevel) {
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
const double vbr_psnr_lin = GetLinearPSNROverBitrate();
EXPECT_GE(cq_psnr_lin, vbr_psnr_lin);
#endif // CONFIG_REALTIME_ONLY
}
VP8_INSTANTIATE_TEST_SUITE(CQTest, ::testing::Range(kCQLevelMin, kCQLevelMax,

View File

@ -25,6 +25,7 @@
#include "vp9/common/vp9_scan.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_config.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/msvc.h" // for round()
#include "vpx_ports/vpx_timer.h"

View File

@ -22,6 +22,7 @@
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
#include "vpx_config.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"

View File

@ -23,6 +23,7 @@
#include "test/register_state_check.h"
#include "test/util.h"
#include "vp9/common/vp9_entropy.h"
#include "vpx_config.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"

View File

@ -16,6 +16,7 @@
#include "test/encode_test_driver.h"
#include "test/util.h"
#include "test/i420_video_source.h"
#include "vpx_config.h"
#include "vpx_mem/vpx_mem.h"
namespace {

View File

@ -847,6 +847,10 @@ TEST(EncodeAPI, PtsOrDurationTooBig) {
// 32-bit systems defined by VPX_MAX_ALLOCABLE_MEMORY
#if VPX_ARCH_X86_64 || VPX_ARCH_AARCH64
TEST(EncodeAPI, ConfigLargeTargetBitrateVp9) {
#ifdef CHROMIUM
GTEST_SKIP() << "Under Chromium's configuration the allocator is unable"
"to provide the space required for the frames below.";
#else
constexpr int kWidth = 12383;
constexpr int kHeight = 8192;
constexpr auto *iface = &vpx_codec_vp9_cx_algo;
@ -868,6 +872,7 @@ TEST(EncodeAPI, ConfigLargeTargetBitrateVp9) {
EXPECT_NO_FATAL_FAILURE(InitCodec(*iface, kWidth, kHeight, &enc.ctx, &cfg))
<< "target bitrate: " << cfg.rc_target_bitrate << " framerate: "
<< static_cast<double>(cfg.g_timebase.den) / cfg.g_timebase.num;
#endif // defined(CHROMIUM)
}
#endif // VPX_ARCH_X86_64 || VPX_ARCH_AARCH64
@ -875,12 +880,14 @@ TEST(EncodeAPI, ConfigLargeTargetBitrateVp9) {
class VP9Encoder {
public:
explicit VP9Encoder(int speed)
: speed_(speed), bit_depth_(VPX_BITS_8), fmt_(VPX_IMG_FMT_I420) {}
: speed_(speed), row_mt_(0), bit_depth_(VPX_BITS_8),
fmt_(VPX_IMG_FMT_I420) {}
// The image format `fmt` must not have the VPX_IMG_FMT_HIGHBITDEPTH bit set.
// If bit_depth > 8, we will set the VPX_IMG_FMT_HIGHBITDEPTH bit before
// passing the image format to vpx_img_alloc().
VP9Encoder(int speed, vpx_bit_depth_t bit_depth, vpx_img_fmt_t fmt)
: speed_(speed), bit_depth_(bit_depth), fmt_(fmt) {}
VP9Encoder(int speed, unsigned int row_mt, vpx_bit_depth_t bit_depth,
vpx_img_fmt_t fmt)
: speed_(speed), row_mt_(row_mt), bit_depth_(bit_depth), fmt_(fmt) {}
~VP9Encoder();
void Configure(unsigned int threads, unsigned int width, unsigned int height,
@ -889,6 +896,7 @@ class VP9Encoder {
private:
const int speed_;
const unsigned int row_mt_;
const vpx_bit_depth_t bit_depth_;
const vpx_img_fmt_t fmt_;
bool initialized_ = false;
@ -938,6 +946,7 @@ void VP9Encoder::Configure(unsigned int threads, unsigned int width,
high_bit_depth ? VPX_CODEC_USE_HIGHBITDEPTH : 0),
VPX_CODEC_OK);
ASSERT_EQ(vpx_codec_control(&enc_, VP8E_SET_CPUUSED, speed_), VPX_CODEC_OK);
ASSERT_EQ(vpx_codec_control(&enc_, VP9E_SET_ROW_MT, row_mt_), VPX_CODEC_OK);
initialized_ = true;
return;
}
@ -1312,6 +1321,145 @@ TEST(EncodeAPI, Buganizer319964497) {
encoder.Encode(/*key_frame=*/false);
}
TEST(EncodeAPI, Buganizer329088759RowMT0) {
VP9Encoder encoder(8, 0, VPX_BITS_8, VPX_IMG_FMT_I444);
encoder.Configure(/*threads=*/8, /*width=*/1686, /*height=*/398, VPX_VBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/true);
encoder.Encode(/*key_frame=*/false);
encoder.Configure(/*threads=*/0, /*width=*/1686, /*height=*/1, VPX_VBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/true);
encoder.Configure(/*threads=*/0, /*width=*/1482, /*height=*/113, VPX_CBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/true);
encoder.Configure(/*threads=*/0, /*width=*/881, /*height=*/59, VPX_CBR,
VPX_DL_REALTIME);
encoder.Configure(/*threads=*/13, /*width=*/1271, /*height=*/385, VPX_CBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/false);
encoder.Configure(/*threads=*/2, /*width=*/1, /*height=*/62, VPX_VBR,
VPX_DL_REALTIME);
}
TEST(EncodeAPI, Buganizer329088759RowMT1) {
VP9Encoder encoder(8, 1, VPX_BITS_8, VPX_IMG_FMT_I444);
encoder.Configure(/*threads=*/8, /*width=*/1686, /*height=*/398, VPX_VBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/true);
encoder.Encode(/*key_frame=*/false);
// Needs to set threads to non-zero to repro the issue.
encoder.Configure(/*threads=*/2, /*width=*/1686, /*height=*/1, VPX_VBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/true);
encoder.Configure(/*threads=*/2, /*width=*/1482, /*height=*/113, VPX_CBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/true);
encoder.Configure(/*threads=*/2, /*width=*/881, /*height=*/59, VPX_CBR,
VPX_DL_REALTIME);
encoder.Configure(/*threads=*/13, /*width=*/1271, /*height=*/385, VPX_CBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/false);
encoder.Configure(/*threads=*/2, /*width=*/1, /*height=*/62, VPX_VBR,
VPX_DL_REALTIME);
}
TEST(EncodeAPI, Buganizer331086799) {
VP9Encoder encoder(6, 1, VPX_BITS_8, VPX_IMG_FMT_I420);
encoder.Configure(0, 1385, 1, VPX_CBR, VPX_DL_REALTIME);
encoder.Configure(0, 1, 1, VPX_VBR, VPX_DL_REALTIME);
encoder.Encode(false);
encoder.Configure(16, 1385, 1, VPX_VBR, VPX_DL_GOOD_QUALITY);
encoder.Encode(false);
encoder.Encode(false);
encoder.Configure(0, 1, 1, VPX_CBR, VPX_DL_REALTIME);
encoder.Encode(true);
}
TEST(EncodeAPI, Buganizer331108729) {
VP9Encoder encoder(1, 1, VPX_BITS_8, VPX_IMG_FMT_I422);
encoder.Configure(0, 1919, 260, VPX_VBR, VPX_DL_REALTIME);
encoder.Configure(9, 440, 1, VPX_CBR, VPX_DL_GOOD_QUALITY);
encoder.Encode(true);
encoder.Configure(8, 1919, 260, VPX_VBR, VPX_DL_REALTIME);
encoder.Encode(false);
}
TEST(EncodeAPI, Buganizer331108922BitDepth8) {
VP9Encoder encoder(9, 1, VPX_BITS_8, VPX_IMG_FMT_I420);
encoder.Configure(/*threads=*/1, /*width=*/1, /*height=*/1080, VPX_VBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/false);
encoder.Configure(/*threads=*/0, /*width=*/1, /*height=*/1080, VPX_CBR,
VPX_DL_GOOD_QUALITY);
encoder.Configure(/*threads=*/16, /*width=*/1, /*height=*/394, VPX_CBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/false);
encoder.Encode(/*key_frame=*/true);
encoder.Configure(/*threads=*/16, /*width=*/1, /*height=*/798, VPX_CBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/false);
}
#if CONFIG_VP9_HIGHBITDEPTH
TEST(EncodeAPI, Buganizer329674887RowMT0BitDepth12) {
VP9Encoder encoder(8, 0, VPX_BITS_12, VPX_IMG_FMT_I444);
encoder.Configure(/*threads=*/2, /*width=*/1030, /*height=*/583, VPX_VBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/true);
encoder.Configure(/*threads=*/0, /*width=*/1030, /*height=*/1, VPX_CBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/true);
encoder.Configure(/*threads=*/0, /*width=*/548, /*height=*/322, VPX_VBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/false);
encoder.Configure(/*threads=*/16, /*width=*/24, /*height=*/583, VPX_CBR,
VPX_DL_GOOD_QUALITY);
}
TEST(EncodeAPI, Buganizer329179808RowMT0BitDepth10) {
VP9Encoder encoder(4, 0, VPX_BITS_10, VPX_IMG_FMT_I444);
encoder.Configure(/*threads=*/16, /*width=*/1488, /*height=*/5, VPX_VBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/true);
encoder.Configure(/*threads=*/16, /*width=*/839, /*height=*/1, VPX_CBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/false);
encoder.Configure(/*threads=*/11, /*width=*/657, /*height=*/5, VPX_CBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/false);
}
TEST(EncodeAPI, Buganizer329179808RowMT1BitDepth10) {
VP9Encoder encoder(4, 1, VPX_BITS_10, VPX_IMG_FMT_I444);
encoder.Configure(/*threads=*/16, /*width=*/1488, /*height=*/5, VPX_VBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/true);
encoder.Configure(/*threads=*/16, /*width=*/839, /*height=*/1, VPX_CBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/false);
encoder.Configure(/*threads=*/11, /*width=*/657, /*height=*/5, VPX_CBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/false);
}
TEST(EncodeAPI, Buganizer331108922BitDepth12) {
VP9Encoder encoder(9, 1, VPX_BITS_12, VPX_IMG_FMT_I444);
encoder.Configure(/*threads=*/1, /*width=*/1, /*height=*/1080, VPX_VBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/false);
encoder.Configure(/*threads=*/0, /*width=*/1, /*height=*/1080, VPX_CBR,
VPX_DL_GOOD_QUALITY);
encoder.Configure(/*threads=*/16, /*width=*/1, /*height=*/394, VPX_CBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/false);
encoder.Encode(/*key_frame=*/true);
encoder.Configure(/*threads=*/16, /*width=*/1, /*height=*/798, VPX_CBR,
VPX_DL_REALTIME);
encoder.Encode(/*key_frame=*/false);
}
#endif
#endif // CONFIG_VP9_ENCODER
} // namespace

View File

@ -33,15 +33,24 @@ enum TestMode {
kTwoPassGood,
kTwoPassBest
};
#if CONFIG_REALTIME_ONLY
#define ALL_TEST_MODES ::testing::Values(::libvpx_test::kRealTime)
#define ONE_PASS_TEST_MODES ::testing::Values(::libvpx_test::kRealTime)
#define ONE_OR_TWO_PASS_TEST_MODES ::testing::Values(::libvpx_test::kRealTime)
#else
#define ALL_TEST_MODES \
::testing::Values(::libvpx_test::kRealTime, ::libvpx_test::kOnePassGood, \
::libvpx_test::kOnePassBest, ::libvpx_test::kTwoPassGood, \
::libvpx_test::kTwoPassBest)
#define ONE_PASS_TEST_MODES \
::testing::Values(::libvpx_test::kRealTime, ::libvpx_test::kOnePassGood, \
::libvpx_test::kOnePassBest)
#define ONE_OR_TWO_PASS_TEST_MODES \
::testing::Values(::libvpx_test::kOnePassGood, ::libvpx_test::kTwoPassGood)
#endif
#define TWO_PASS_TEST_MODES \
::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kTwoPassBest)

View File

@ -13,6 +13,7 @@
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
#include "test/util.h"
#include "vpx_config.h"
namespace {
@ -194,6 +195,10 @@ class ErrorResilienceTestLarge
};
TEST_P(ErrorResilienceTestLarge, OnVersusOff) {
#if CONFIG_REALTIME_ONLY
GTEST_SKIP()
<< "Non-zero g_lag_in_frames is unsupported with CONFIG_REALTIME_ONLY";
#else
const vpx_rational timebase = { 33333333, 1000000000 };
cfg_.g_timebase = timebase;
cfg_.rc_target_bitrate = 2000;
@ -222,6 +227,7 @@ TEST_P(ErrorResilienceTestLarge, OnVersusOff) {
EXPECT_GE(psnr_ratio, 0.9);
EXPECT_LE(psnr_ratio, 1.1);
}
#endif // CONFIG_REALTIME_ONLY
}
// Check for successful decoding and no encoder/decoder mismatch

View File

@ -23,6 +23,7 @@
#include "test/util.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_scan.h"
#include "vpx_config.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"

View File

@ -13,6 +13,7 @@
#include "test/codec_factory.h"
#include "test/register_state_check.h"
#include "test/video_source.h"
#include "vpx_config.h"
namespace {
@ -167,6 +168,9 @@ class VP9FrameSizeTestsLarge : public ::libvpx_test::EncoderTest,
};
TEST_F(VP9FrameSizeTestsLarge, TestInvalidSizes) {
#ifdef CHROMIUM
GTEST_SKIP() << "16K framebuffers are not supported by Chromium's allocator.";
#else
::libvpx_test::RandomVideoSource video;
#if CONFIG_SIZE_LIMIT
@ -175,9 +179,16 @@ TEST_F(VP9FrameSizeTestsLarge, TestInvalidSizes) {
expected_res_ = VPX_CODEC_MEM_ERROR;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video, expected_res_));
#endif
#endif
}
TEST_F(VP9FrameSizeTestsLarge, ValidSizes) {
#ifdef CHROMIUM
GTEST_SKIP()
<< "Under Chromium's configuration the allocator is unable to provide"
"the space required for a single frame at the maximum resolution.";
#else
::libvpx_test::RandomVideoSource video;
#if CONFIG_SIZE_LIMIT
@ -202,6 +213,8 @@ TEST_F(VP9FrameSizeTestsLarge, ValidSizes) {
expected_res_ = VPX_CODEC_OK;
ASSERT_NO_FATAL_FAILURE(::libvpx_test::EncoderTest::RunLoop(&video));
#endif
#endif // defined(CHROMIUM)
}
TEST_F(VP9FrameSizeTestsLarge, OneByOneVideo) {

View File

@ -17,6 +17,7 @@
#include "test/acm_random.h"
#include "test/register_state_check.h"
#include "vpx_config.h"
namespace {

View File

@ -12,6 +12,7 @@
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
#include "test/util.h"
#include "vpx_config.h"
namespace {
class LevelTest
@ -67,6 +68,9 @@ class LevelTest
};
TEST_P(LevelTest, TestTargetLevel11Large) {
#if CONFIG_REALTIME_ONLY
GTEST_SKIP();
#else
ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime);
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
60);
@ -74,9 +78,13 @@ TEST_P(LevelTest, TestTargetLevel11Large) {
cfg_.rc_target_bitrate = 150;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_GE(target_level_, level_);
#endif
}
TEST_P(LevelTest, TestTargetLevel20Large) {
#if CONFIG_REALTIME_ONLY
GTEST_SKIP();
#else
ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime);
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 60);
@ -84,9 +92,13 @@ TEST_P(LevelTest, TestTargetLevel20Large) {
cfg_.rc_target_bitrate = 1200;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_GE(target_level_, level_);
#endif
}
TEST_P(LevelTest, TestTargetLevel31Large) {
#if CONFIG_REALTIME_ONLY
GTEST_SKIP();
#else
ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime);
::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720, 30,
1, 0, 60);
@ -94,6 +106,7 @@ TEST_P(LevelTest, TestTargetLevel31Large) {
cfg_.rc_target_bitrate = 8000;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_GE(target_level_, level_);
#endif
}
// Test for keeping level stats only
@ -140,8 +153,6 @@ TEST_P(LevelTest, TestTargetLevelApi) {
EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&enc));
}
VP9_INSTANTIATE_TEST_SUITE(LevelTest,
::testing::Values(::libvpx_test::kTwoPassGood,
::libvpx_test::kOnePassGood),
VP9_INSTANTIATE_TEST_SUITE(LevelTest, ONE_OR_TWO_PASS_TEST_MODES,
::testing::Range(0, 9));
} // namespace

View File

@ -13,6 +13,7 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"

View File

@ -25,6 +25,7 @@
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_scan.h"
#include "vpx/vpx_integer.h"
#include "vpx_config.h"
#include "vpx_ports/vpx_timer.h"
using libvpx_test::ACMRandom;

View File

@ -14,6 +14,7 @@
#include "test/util.h"
#include "test/video_source.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "vpx_config.h"
namespace {
@ -94,6 +95,9 @@ TEST_P(RealtimeTest, RealtimeDefaultCpuUsed) {
TEST_P(RealtimeTest, IntegerOverflow) { TestIntegerOverflow(2048, 2048); }
TEST_P(RealtimeTest, IntegerOverflowLarge) {
#ifdef CHROMIUM
GTEST_SKIP() << "16K framebuffers are not supported by Chromium's allocator.";
#else
if (IsVP9()) {
#if VPX_ARCH_AARCH64 || VPX_ARCH_X86_64
TestIntegerOverflow(16384, 16384);
@ -107,6 +111,7 @@ TEST_P(RealtimeTest, IntegerOverflowLarge) {
"warnings are fixed.";
// TestIntegerOverflow(16383, 16383);
}
#endif // defined(CHROMIUM)
}
VP8_INSTANTIATE_TEST_SUITE(RealtimeTest,

View File

@ -15,6 +15,7 @@
#include "test/i420_video_source.h"
#include "test/video_source.h"
#include "test/util.h"
#include "vpx_config.h"
// Enable(1) or Disable(0) writing of the compressed bitstream.
#define WRITE_COMPRESSED_STREAM 0

View File

@ -21,9 +21,12 @@ LIBVPX_TEST_SRCS-yes += video_source.h
## Black box tests only use the public API.
##
LIBVPX_TEST_SRCS-yes += ../md5_utils.h ../md5_utils.c
LIBVPX_TEST_SRCS-yes += vpx_image_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ivf_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += ../y4minput.h ../y4minput.c
ifneq ($(CONFIG_REALTIME_ONLY),yes)
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += altref_test.cc
endif
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += encode_api_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h
@ -32,7 +35,9 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += resize_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += yuv_video_source.h
ifneq ($(CONFIG_REALTIME_ONLY),yes)
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc
endif
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_datarate_test.cc
@ -43,7 +48,9 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += user_priv_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_refresh_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc
ifneq ($(CONFIG_REALTIME_ONLY),yes)
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += alt_ref_aq_segment_test.cc
endif
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += aq_segment_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
@ -60,7 +67,9 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += svc_test.h
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += svc_end_to_end_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += timestamp_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_datarate_test.cc
ifneq ($(CONFIG_REALTIME_ONLY),yes)
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_ext_ratectrl_test.cc
endif
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += ../vp9/simple_encode.h
LIBVPX_TEST_SRCS-yes += decode_test_driver.cc

View File

@ -9,6 +9,7 @@
*/
#include "test/test_vectors.h"
#include "vpx_config.h"
namespace libvpx_test {

View File

@ -12,6 +12,7 @@
#include "test/util.h"
#include "test/video_source.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "vpx_config.h"
namespace {
@ -94,8 +95,15 @@ TEST_P(TimestampTest, TestVpxRollover) {
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
#if CONFIG_REALTIME_ONLY
VP8_INSTANTIATE_TEST_SUITE(TimestampTest,
::testing::Values(::libvpx_test::kRealTime));
VP9_INSTANTIATE_TEST_SUITE(TimestampTest,
::testing::Values(::libvpx_test::kRealTime));
#else
VP8_INSTANTIATE_TEST_SUITE(TimestampTest,
::testing::Values(::libvpx_test::kTwoPassGood));
VP9_INSTANTIATE_TEST_SUITE(TimestampTest,
::testing::Values(::libvpx_test::kTwoPassGood));
#endif
} // namespace

View File

@ -21,6 +21,7 @@
#include "vp8/encoder/denoising.h"
#include "vp8/common/reconinter.h"
#include "vpx/vpx_integer.h"
#include "vpx_config.h"
#include "vpx_mem/vpx_mem.h"
using libvpx_test::ACMRandom;

View File

@ -18,6 +18,7 @@
#include "test/y4m_video_source.h"
#include "test/yuv_video_source.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vpx_config.h"
namespace {

View File

@ -53,7 +53,7 @@ TEST(VP9, TestBitIO) {
ACMRandom bit_rnd(random_seed);
vpx_writer bw;
uint8_t bw_buffer[kBufferSize];
vpx_start_encode(&bw, bw_buffer);
vpx_start_encode(&bw, bw_buffer, sizeof(bw_buffer));
int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
for (int i = 0; i < kBitsToTest; ++i) {
@ -65,7 +65,7 @@ TEST(VP9, TestBitIO) {
vpx_write(&bw, bit, static_cast<int>(probas[i]));
}
vpx_stop_encode(&bw);
GTEST_ASSERT_EQ(vpx_stop_encode(&bw), 0);
// vpx_reader_fill() may read into uninitialized data that
// isn't used meaningfully, but may trigger an MSan warning.
memset(bw_buffer + bw.pos, 0, sizeof(BD_VALUE) - 1);
@ -90,3 +90,24 @@ TEST(VP9, TestBitIO) {
}
}
}
TEST(VP9, TestBitIOBufferSize0) {
vpx_writer bw;
uint8_t bw_buffer[1];
vpx_start_encode(&bw, bw_buffer, 0);
GTEST_ASSERT_EQ(vpx_stop_encode(&bw), -1);
}
TEST(VP9, TestBitIOBufferSize1) {
vpx_writer bw;
uint8_t bw_buffer[1];
vpx_start_encode(&bw, bw_buffer, sizeof(bw_buffer));
GTEST_ASSERT_EQ(vpx_stop_encode(&bw), -1);
}
TEST(VP9, TestBitIOBufferSize2) {
vpx_writer bw;
uint8_t bw_buffer[2];
vpx_start_encode(&bw, bw_buffer, sizeof(bw_buffer));
GTEST_ASSERT_EQ(vpx_stop_encode(&bw), 0);
}

View File

@ -24,6 +24,7 @@
#include "vp9/common/vp9_reconinter.h"
#include "vp9/encoder/vp9_context_tree.h"
#include "vp9/encoder/vp9_denoiser.h"
#include "vpx_config.h"
using libvpx_test::ACMRandom;

View File

@ -18,6 +18,7 @@
#include "test/util.h"
#include "test/y4m_video_source.h"
#include "test/yuv_video_source.h"
#include "vpx_config.h"
namespace {
@ -65,7 +66,9 @@ const TestVideoParam kTestVectorsNv12[] = {
// Encoding modes tested
const libvpx_test::TestMode kEncodingModeVectors[] = {
#if !CONFIG_REALTIME_ONLY
::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
#endif
::libvpx_test::kRealTime
};

View File

@ -17,6 +17,7 @@
#include "test/util.h"
#include "test/y4m_video_source.h"
#include "vp9/encoder/vp9_firstpass.h"
#include "vpx_config.h"
namespace {
// FIRSTPASS_STATS struct:
@ -168,6 +169,9 @@ static void compare_fp_stats_md5(vpx_fixed_buf_t *fp_stats) {
}
TEST_P(VPxFirstPassEncoderThreadTest, FirstPassStatsTest) {
#if CONFIG_REALTIME_ONLY
GTEST_SKIP();
#else
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
first_pass_only_ = true;
@ -216,6 +220,7 @@ TEST_P(VPxFirstPassEncoderThreadTest, FirstPassStatsTest) {
// Compare to check if stats match with row-mt=0/1.
compare_fp_stats_md5(&firstpass_stats_);
#endif // CONFIG_REALTIME_ONLY
}
class VPxEncoderThreadTest
@ -407,23 +412,17 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Combine(
::testing::Values(
static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
::testing::Values(::libvpx_test::kTwoPassGood,
::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
::testing::Range(3, 10), // cpu_used
::testing::Range(0, 3), // tile_columns
::testing::Range(2, 5))); // threads
ONE_PASS_TEST_MODES, ::testing::Range(3, 10), // cpu_used
::testing::Range(0, 3), // tile_columns
::testing::Range(2, 5))); // threads
INSTANTIATE_TEST_SUITE_P(
VP9Large, VPxEncoderThreadTest,
::testing::Combine(
::testing::Values(
static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
::testing::Values(::libvpx_test::kTwoPassGood,
::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
::testing::Range(0, 3), // cpu_used
::testing::Range(0, 3), // tile_columns
::testing::Range(2, 5))); // threads
ONE_PASS_TEST_MODES, ::testing::Range(0, 3), // cpu_used
::testing::Range(0, 3), // tile_columns
::testing::Range(2, 5))); // threads
} // namespace

View File

@ -51,6 +51,22 @@ class RateControllerForTest {
gop_decision.use_key_frame = current_gop_ == 0 ? 1 : 0;
gop_decision.use_alt_ref = 1;
gop_decision.gop_coding_frames = kFixedGOPSize;
// First frame is key frame
gop_decision.update_type[0] = VPX_RC_KF_UPDATE;
for (int i = 1; i < kFixedGOPSize; i++) {
gop_decision.update_type[i] = VPX_RC_LF_UPDATE;
gop_decision.update_ref_index[i] = 0;
gop_decision.ref_frame_list[i].index[0] = 0;
gop_decision.ref_frame_list[i].name[0] = VPX_RC_LAST_FRAME;
gop_decision.ref_frame_list[i].index[1] = 0;
gop_decision.ref_frame_list[i].name[1] = VPX_RC_GOLDEN_FRAME;
gop_decision.ref_frame_list[i].index[2] = 0;
gop_decision.ref_frame_list[i].name[1] = VPX_RC_ALTREF_FRAME;
}
// Second frame is altref
gop_decision.update_type[1] = VPX_RC_ARF_UPDATE;
gop_decision.update_ref_index[1] = 2;
return gop_decision;
}
@ -136,7 +152,11 @@ class ExtRateCtrlTest : public ::libvpx_test::EncoderTest,
void SetUp() override {
InitializeConfig();
#if CONFIG_REALTIME_ONLY
SetMode(::libvpx_test::kRealTime);
#else
SetMode(::libvpx_test::kTwoPassGood);
#endif
}
void PreEncodeFrameHook(::libvpx_test::VideoSource *video,

View File

@ -118,8 +118,13 @@ TEST_P(LosslessTest, TestLossLessEncodingCtrl) {
EXPECT_GE(psnr_lossless, kMaxPsnr);
}
#if CONFIG_REALTIME_ONLY
VP9_INSTANTIATE_TEST_SUITE(LosslessTest,
::testing::Values(::libvpx_test::kRealTime));
#else
VP9_INSTANTIATE_TEST_SUITE(LosslessTest,
::testing::Values(::libvpx_test::kRealTime,
::libvpx_test::kOnePassGood,
::libvpx_test::kTwoPassGood));
#endif
} // namespace

View File

@ -16,6 +16,7 @@
#include "test/encode_test_driver.h"
#include "test/util.h"
#include "test/yuv_video_source.h"
#include "vpx_config.h"
namespace {
#define MAX_EXTREME_MV 1
@ -23,7 +24,9 @@ namespace {
// Encoding modes
const libvpx_test::TestMode kEncodingModeVectors[] = {
#if !CONFIG_REALTIME_ONLY
::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
#endif
::libvpx_test::kRealTime
};

View File

@ -0,0 +1,127 @@
/*
* Copyright (c) 2024 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <climits>
#include "vpx/vpx_image.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
TEST(VpxImageTest, VpxImgWrapInvalidAlign) {
const int kWidth = 128;
const int kHeight = 128;
unsigned char buf[kWidth * kHeight * 3];
vpx_image_t img;
// Set img_data and img_data_owner to junk values. vpx_img_wrap() should
// not read these values on failure.
unsigned char empty[] = "";
img.img_data = empty;
img.img_data_owner = 1;
vpx_img_fmt_t format = VPX_IMG_FMT_I444;
// 'align' must be a power of 2 but is not. This causes the vpx_img_wrap()
// call to fail. The test verifies we do not read the junk values in 'img'.
unsigned int align = 31;
EXPECT_EQ(vpx_img_wrap(&img, format, kWidth, kHeight, align, buf), nullptr);
}
TEST(VpxImageTest, VpxImgSetRectOverflow) {
const int kWidth = 128;
const int kHeight = 128;
unsigned char buf[kWidth * kHeight * 3];
vpx_image_t img;
vpx_img_fmt_t format = VPX_IMG_FMT_I444;
unsigned int align = 32;
EXPECT_EQ(vpx_img_wrap(&img, format, kWidth, kHeight, align, buf), &img);
EXPECT_EQ(vpx_img_set_rect(&img, 0, 0, kWidth, kHeight), 0);
// This would result in overflow because -1 is cast to UINT_MAX.
EXPECT_NE(vpx_img_set_rect(&img, static_cast<unsigned int>(-1),
static_cast<unsigned int>(-1), kWidth, kHeight),
0);
}
TEST(VpxImageTest, VpxImgAllocNone) {
const int kWidth = 128;
const int kHeight = 128;
vpx_image_t img;
vpx_img_fmt_t format = VPX_IMG_FMT_NONE;
unsigned int align = 32;
ASSERT_EQ(vpx_img_alloc(&img, format, kWidth, kHeight, align), nullptr);
}
TEST(VpxImageTest, VpxImgAllocNv12) {
const int kWidth = 128;
const int kHeight = 128;
vpx_image_t img;
vpx_img_fmt_t format = VPX_IMG_FMT_NV12;
unsigned int align = 32;
EXPECT_EQ(vpx_img_alloc(&img, format, kWidth, kHeight, align), &img);
EXPECT_EQ(img.stride[VPX_PLANE_U], img.stride[VPX_PLANE_Y]);
EXPECT_EQ(img.stride[VPX_PLANE_V], img.stride[VPX_PLANE_U]);
EXPECT_EQ(img.planes[VPX_PLANE_V], img.planes[VPX_PLANE_U] + 1);
vpx_img_free(&img);
}
TEST(VpxImageTest, VpxImgAllocHugeWidth) {
// The stride (0x80000000 * 2) would overflow unsigned int.
vpx_image_t *image =
vpx_img_alloc(nullptr, VPX_IMG_FMT_I42016, 0x80000000, 1, 1);
ASSERT_EQ(image, nullptr);
// The stride (0x80000000) would overflow int.
image = vpx_img_alloc(nullptr, VPX_IMG_FMT_I420, 0x80000000, 1, 1);
ASSERT_EQ(image, nullptr);
// The aligned width (UINT_MAX + 1) would overflow unsigned int.
image = vpx_img_alloc(nullptr, VPX_IMG_FMT_I420, UINT_MAX, 1, 1);
ASSERT_EQ(image, nullptr);
image = vpx_img_alloc(nullptr, VPX_IMG_FMT_I420, 0x7ffffffe, 1, 1);
if (image) {
vpx_img_free(image);
}
image = vpx_img_alloc(nullptr, VPX_IMG_FMT_I420, 285245883, 64, 1);
if (image) {
vpx_img_free(image);
}
image = vpx_img_alloc(nullptr, VPX_IMG_FMT_NV12, 285245883, 64, 1);
if (image) {
vpx_img_free(image);
}
image = vpx_img_alloc(nullptr, VPX_IMG_FMT_YV12, 285245883, 64, 1);
if (image) {
vpx_img_free(image);
}
image = vpx_img_alloc(nullptr, VPX_IMG_FMT_I42016, 65536, 2, 1);
if (image) {
uint16_t *y_plane =
reinterpret_cast<uint16_t *>(image->planes[VPX_PLANE_Y]);
y_plane[0] = 0;
y_plane[image->d_w - 1] = 0;
vpx_img_free(image);
}
image = vpx_img_alloc(nullptr, VPX_IMG_FMT_I42016, 285245883, 2, 1);
if (image) {
uint16_t *y_plane =
reinterpret_cast<uint16_t *>(image->planes[VPX_PLANE_Y]);
y_plane[0] = 0;
y_plane[image->d_w - 1] = 0;
vpx_img_free(image);
}
}

View File

@ -14,6 +14,7 @@
#include "test/acm_random.h"
#include "test/buffer.h"
#include "test/register_state_check.h"
#include "vpx_config.h"
#include "vpx_ports/vpx_timer.h"
namespace {

View File

@ -64,19 +64,4 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) {
#if CONFIG_MULTITHREAD
ctx->processor_core_count = get_cpu_count();
#endif /* CONFIG_MULTITHREAD */
#if VPX_ARCH_ARM
ctx->cpu_caps = arm_cpu_caps();
#elif VPX_ARCH_X86 || VPX_ARCH_X86_64
ctx->cpu_caps = x86_simd_caps();
#elif VPX_ARCH_PPC
ctx->cpu_caps = ppc_simd_caps();
#elif VPX_ARCH_MIPS
ctx->cpu_caps = mips_cpu_caps();
#elif VPX_ARCH_LOONGARCH
ctx->cpu_caps = loongarch_cpu_caps();
#else
// generic-gnu targets.
ctx->cpu_caps = 0;
#endif
}

View File

@ -167,7 +167,6 @@ typedef struct VP8Common {
#if CONFIG_POSTPROC
struct postproc_state postproc_state;
#endif
int cpu_caps;
} VP8_COMMON;
#ifdef __cplusplus

View File

@ -501,7 +501,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) {
int ct[4];
vp8_find_near_mvs(xd, m, &n1, &n2, &best_mv, ct, rf,
cpi->common.ref_frame_sign_bias);
pc->ref_frame_sign_bias);
vp8_clamp_mv2(&best_mv, xd);
vp8_mv_ref_probs(mv_ref_p, ct);
@ -1021,7 +1021,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest,
bc[0].error = &pc->error;
validate_buffer(cx_data, 3, cx_data_end, &cpi->common.error);
validate_buffer(cx_data, 3, cx_data_end, &pc->error);
cx_data += 3;
#if defined(SECTIONBITS_OUTPUT)
@ -1034,7 +1034,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest,
if (oh.type == KEY_FRAME) {
int v;
validate_buffer(cx_data, 7, cx_data_end, &cpi->common.error);
validate_buffer(cx_data, 7, cx_data_end, &pc->error);
/* Start / synch code */
cx_data[0] = 0x9D;
@ -1243,7 +1243,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest,
#else
if (pc->refresh_entropy_probs == 0) {
/* save a copy for later refresh */
memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc));
pc->lfc = pc->fc;
}
vp8_update_coef_probs(cpi);

View File

@ -15,7 +15,6 @@
#include "vp8/common/common.h"
#include "vp8/common/entropymode.h"
#include "vp8/common/extend.h"
#include "vp8/common/findnearmv.h"
#include "vp8/common/invtrans.h"
#include "vp8/common/quant_common.h"
#include "vp8/common/reconinter.h"
@ -25,11 +24,9 @@
#include "vp8/encoder/encodeframe.h"
#include "vp8/encoder/encodeintra.h"
#include "vp8/encoder/encodemb.h"
#include "vp8/encoder/encodemv.h"
#include "vp8/encoder/onyx_int.h"
#include "vp8/encoder/pickinter.h"
#include "vp8/encoder/rdopt.h"
#include "vp8/encoder/segmentation.h"
#include "vp8_rtcd.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx_dsp_rtcd.h"

View File

@ -23,7 +23,6 @@
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/static_assert.h"
#include "vpx_ports/system_state.h"
#include "vpx_util/vpx_thread.h"
#include "vpx_util/vpx_timestamp.h"
#if CONFIG_MULTITHREAD
#include "vp8/encoder/ethreading.h"
@ -152,7 +151,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK_HI(cfg, g_profile, 3);
RANGE_CHECK_HI(cfg, rc_max_quantizer, 63);
RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer);
RANGE_CHECK_HI(cfg, g_threads, MAX_NUM_THREADS);
RANGE_CHECK_HI(cfg, g_threads, 64);
#if CONFIG_REALTIME_ONLY
RANGE_CHECK_HI(cfg, g_lag_in_frames, 0);
#elif CONFIG_MULTI_RES_ENCODING
@ -710,6 +709,7 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx,
priv->cx_data = malloc(priv->cx_data_sz);
if (!priv->cx_data) {
priv->cx_data_sz = 0;
return VPX_CODEC_MEM_ERROR;
}
@ -994,19 +994,10 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
res = image2yuvconfig(img, &sd);
if (sd.y_width != ctx->cfg.g_w || sd.y_height != ctx->cfg.g_h) {
/* from vpx_encoder.h for g_w/g_h:
"Note that the frames passed as input to the encoder must have this
resolution"
*/
ctx->base.err_detail = "Invalid input frame resolution";
res = VPX_CODEC_INVALID_PARAM;
} else {
if (vp8_receive_raw_frame(ctx->cpi, ctx->next_frame_flag | lib_flags,
&sd, dst_time_stamp, dst_end_time_stamp)) {
VP8_COMP *cpi = (VP8_COMP *)ctx->cpi;
res = update_error_state(ctx, &cpi->common.error);
}
if (vp8_receive_raw_frame(ctx->cpi, ctx->next_frame_flag | lib_flags, &sd,
dst_time_stamp, dst_end_time_stamp)) {
VP8_COMP *cpi = (VP8_COMP *)ctx->cpi;
res = update_error_state(ctx, &cpi->common.error);
}
/* reset for next frame */

View File

@ -68,6 +68,7 @@ static int decode_unsigned_max(struct vpx_read_bit_buffer *rb, int max) {
static TX_MODE read_tx_mode(vpx_reader *r) {
TX_MODE tx_mode = vpx_read_literal(r, 2);
if (tx_mode == ALLOW_32X32) tx_mode += vpx_read_bit(r);
assert(tx_mode < TX_MODES);
return tx_mode;
}

View File

@ -9,6 +9,7 @@
*/
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <limits.h>
@ -943,12 +944,11 @@ static int encode_tile_worker(void *arg1, void *arg2) {
VP9BitstreamWorkerData *data = (VP9BitstreamWorkerData *)arg2;
MACROBLOCKD *const xd = &data->xd;
const int tile_row = 0;
vpx_start_encode(&data->bit_writer, data->dest);
vpx_start_encode(&data->bit_writer, data->dest, data->dest_size);
write_modes(cpi, xd, &cpi->tile_data[data->tile_idx].tile_info,
&data->bit_writer, tile_row, data->tile_idx,
&data->max_mv_magnitude, data->interp_filter_selected);
vpx_stop_encode(&data->bit_writer);
return 1;
return vpx_stop_encode(&data->bit_writer) == 0;
}
void vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP *const cpi) {
@ -962,7 +962,18 @@ void vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP *const cpi) {
}
}
static void encode_tiles_buffer_alloc(VP9_COMP *const cpi) {
static size_t encode_tiles_buffer_alloc_size(const VP9_COMP *cpi) {
const VP9_COMMON *cm = &cpi->common;
const int image_bps =
(8 + 2 * (8 >> (cm->subsampling_x + cm->subsampling_y))) *
(1 + (cm->bit_depth > 8));
const int64_t size =
(int64_t)cpi->oxcf.width * cpi->oxcf.height * image_bps / 8;
return (size_t)size;
}
static void encode_tiles_buffer_alloc(VP9_COMP *const cpi,
size_t buffer_alloc_size) {
VP9_COMMON *const cm = &cpi->common;
int i;
const size_t worker_data_size =
@ -971,14 +982,14 @@ static void encode_tiles_buffer_alloc(VP9_COMP *const cpi) {
vpx_memalign(16, worker_data_size));
memset(cpi->vp9_bitstream_worker_data, 0, worker_data_size);
for (i = 1; i < cpi->num_workers; ++i) {
cpi->vp9_bitstream_worker_data[i].dest_size =
cpi->oxcf.width * cpi->oxcf.height;
CHECK_MEM_ERROR(&cm->error, cpi->vp9_bitstream_worker_data[i].dest,
vpx_malloc(cpi->vp9_bitstream_worker_data[i].dest_size));
vpx_malloc(buffer_alloc_size));
cpi->vp9_bitstream_worker_data[i].dest_size = buffer_alloc_size;
}
}
static size_t encode_tiles_mt(VP9_COMP *cpi, uint8_t *data_ptr) {
static size_t encode_tiles_mt(VP9_COMP *cpi, uint8_t *data_ptr,
size_t data_size) {
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
@ -986,11 +997,11 @@ static size_t encode_tiles_mt(VP9_COMP *cpi, uint8_t *data_ptr) {
size_t total_size = 0;
int tile_col = 0;
const size_t buffer_alloc_size = encode_tiles_buffer_alloc_size(cpi);
if (!cpi->vp9_bitstream_worker_data ||
cpi->vp9_bitstream_worker_data[1].dest_size >
(cpi->oxcf.width * cpi->oxcf.height)) {
cpi->vp9_bitstream_worker_data[1].dest_size != buffer_alloc_size) {
vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
encode_tiles_buffer_alloc(cpi);
encode_tiles_buffer_alloc(cpi, buffer_alloc_size);
}
while (tile_col < tile_cols) {
@ -1010,8 +1021,13 @@ static size_t encode_tiles_mt(VP9_COMP *cpi, uint8_t *data_ptr) {
if (i == 0) {
// If this worker happens to be for the last tile, then do not offset it
// by 4 for the tile size.
data->dest =
data_ptr + total_size + (tile_col == tile_cols - 1 ? 0 : 4);
const size_t offset = total_size + (tile_col == tile_cols - 1 ? 0 : 4);
if (data_size < offset) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"encode_tiles_mt: output buffer full");
}
data->dest = data_ptr + offset;
data->dest_size = data_size - offset;
}
worker->data1 = cpi;
worker->data2 = data;
@ -1032,7 +1048,10 @@ static size_t encode_tiles_mt(VP9_COMP *cpi, uint8_t *data_ptr) {
uint32_t tile_size;
int k;
if (!winterface->sync(worker)) return 0;
if (!winterface->sync(worker)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"encode_tiles_mt: worker had error");
}
tile_size = data->bit_writer.pos;
// Aggregate per-thread bitstream stats.
@ -1044,10 +1063,18 @@ static size_t encode_tiles_mt(VP9_COMP *cpi, uint8_t *data_ptr) {
// Prefix the size of the tile on all but the last.
if (tile_col != tile_cols || j < i - 1) {
if (data_size - total_size < 4) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"encode_tiles_mt: output buffer full");
}
mem_put_be32(data_ptr + total_size, tile_size);
total_size += 4;
}
if (j > 0) {
if (data_size - total_size < tile_size) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"encode_tiles_mt: output buffer full");
}
memcpy(data_ptr + total_size, data->dest, tile_size);
}
total_size += tile_size;
@ -1056,7 +1083,7 @@ static size_t encode_tiles_mt(VP9_COMP *cpi, uint8_t *data_ptr) {
return total_size;
}
static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr, size_t data_size) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
vpx_writer residual_bc;
@ -1073,23 +1100,32 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
// that it does not make the overall process worse in any case.
if (cpi->oxcf.mode == REALTIME && cpi->num_workers > 1 && tile_rows == 1 &&
tile_cols > 1) {
return encode_tiles_mt(cpi, data_ptr);
return encode_tiles_mt(cpi, data_ptr, data_size);
}
for (tile_row = 0; tile_row < tile_rows; tile_row++) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
int tile_idx = tile_row * tile_cols + tile_col;
size_t offset;
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
vpx_start_encode(&residual_bc, data_ptr + total_size + 4);
offset = total_size + 4;
else
vpx_start_encode(&residual_bc, data_ptr + total_size);
offset = total_size;
if (data_size < offset) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"encode_tiles: output buffer full");
}
vpx_start_encode(&residual_bc, data_ptr + offset, data_size - offset);
write_modes(cpi, xd, &cpi->tile_data[tile_idx].tile_info, &residual_bc,
tile_row, tile_col, &cpi->max_mv_magnitude,
cpi->interp_filter_selected);
vpx_stop_encode(&residual_bc);
if (vpx_stop_encode(&residual_bc)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"encode_tiles: output buffer full");
}
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
// size of this tile
mem_put_be32(data_ptr + total_size, residual_bc.pos);
@ -1271,14 +1307,15 @@ static void write_uncompressed_header(VP9_COMP *cpi,
write_tile_info(cm, wb);
}
static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data,
size_t data_size) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
FRAME_CONTEXT *const fc = cm->fc;
FRAME_COUNTS *counts = cpi->td.counts;
vpx_writer header_bc;
vpx_start_encode(&header_bc, data);
vpx_start_encode(&header_bc, data, data_size);
if (xd->lossless)
cm->tx_mode = ONLY_4X4;
@ -1342,46 +1379,68 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
&counts->mv);
}
vpx_stop_encode(&header_bc);
assert(header_bc.pos <= 0xffff);
if (vpx_stop_encode(&header_bc)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"write_compressed_header: output buffer full");
}
return header_bc.pos;
}
void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) {
void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t dest_size,
size_t *size) {
VP9_COMMON *const cm = &cpi->common;
uint8_t *data = dest;
size_t first_part_size, uncompressed_hdr_size;
struct vpx_write_bit_buffer wb = { data, 0 };
size_t data_size = dest_size;
size_t uncompressed_hdr_size, compressed_hdr_size;
struct vpx_write_bit_buffer wb;
struct vpx_write_bit_buffer saved_wb;
#if CONFIG_BITSTREAM_DEBUG
bitstream_queue_reset_write();
#endif
vpx_wb_init(&wb, data, data_size);
write_uncompressed_header(cpi, &wb);
if (vpx_wb_has_error(&wb)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"vp9_pack_bitstream: output buffer full");
}
// Skip the rest coding process if use show existing frame.
if (cpi->common.show_existing_frame) {
if (cm->show_existing_frame) {
uncompressed_hdr_size = vpx_wb_bytes_written(&wb);
data += uncompressed_hdr_size;
data_size -= uncompressed_hdr_size;
*size = data - dest;
return;
}
saved_wb = wb;
vpx_wb_write_literal(&wb, 0, 16); // don't know in advance first part. size
// don't know in advance compressed header size
vpx_wb_write_literal(&wb, 0, 16);
if (vpx_wb_has_error(&wb)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"vp9_pack_bitstream: output buffer full");
}
uncompressed_hdr_size = vpx_wb_bytes_written(&wb);
data += uncompressed_hdr_size;
data_size -= uncompressed_hdr_size;
vpx_clear_system_state();
first_part_size = write_compressed_header(cpi, data);
data += first_part_size;
// TODO(jbb): Figure out what to do if first_part_size > 16 bits.
vpx_wb_write_literal(&saved_wb, (int)first_part_size, 16);
compressed_hdr_size = write_compressed_header(cpi, data, data_size);
data += compressed_hdr_size;
data_size -= compressed_hdr_size;
if (compressed_hdr_size > UINT16_MAX) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"compressed_hdr_size > 16 bits");
}
vpx_wb_write_literal(&saved_wb, (int)compressed_hdr_size, 16);
assert(!vpx_wb_has_error(&saved_wb));
data += encode_tiles(cpi, data);
data += encode_tiles(cpi, data, data_size);
*size = data - dest;
}

View File

@ -19,7 +19,7 @@ extern "C" {
typedef struct VP9BitstreamWorkerData {
uint8_t *dest;
int dest_size;
size_t dest_size;
vpx_writer bit_writer;
int tile_idx;
unsigned int max_mv_magnitude;
@ -35,7 +35,8 @@ int vp9_get_refresh_mask(VP9_COMP *cpi);
void vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP *const cpi);
void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size);
void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t dest_size,
size_t *size);
static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) {
return cpi->refresh_golden_frame && cpi->rc.is_src_frame_alt_ref &&

View File

@ -5853,7 +5853,12 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
int tplist_count = 0;
if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
if (cpi->tile_data != NULL) {
// Free the row mt memory in cpi->tile_data first.
vp9_row_mt_mem_dealloc(cpi);
vpx_free(cpi->tile_data);
}
cpi->allocated_tiles = 0;
CHECK_MEM_ERROR(
&cm->error, cpi->tile_data,
vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
@ -5883,9 +5888,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
TileInfo *tile_info = &this_tile->tile_info;
if (cpi->sf.adaptive_rd_thresh_row_mt &&
this_tile->row_base_thresh_freq_fact == NULL)
if (cpi->sf.adaptive_rd_thresh_row_mt) {
vp9_row_mt_alloc_rd_thresh(cpi, this_tile);
}
vp9_tile_init(tile_info, cm, tile_row, tile_col);
cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;

View File

@ -3901,7 +3901,7 @@ static void set_frame_size(VP9_COMP *cpi) {
}
if (!frame_is_intra_only(cm) && !has_valid_ref_frame) {
vpx_internal_error(
&cm->error, VPX_CODEC_CORRUPT_FRAME,
&cm->error, VPX_CODEC_ERROR,
"Can't find at least one reference frame with valid size");
}
@ -3973,7 +3973,7 @@ static YV12_BUFFER_CONFIG *svc_twostage_scale(
}
static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
uint8_t *dest) {
uint8_t *dest, size_t dest_size) {
VP9_COMMON *const cm = &cpi->common;
SVC *const svc = &cpi->svc;
int q = 0, bottom_index = 0, top_index = 0;
@ -4269,7 +4269,7 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
int frame_size = 0;
// Get an estimate of the encoded frame size.
save_coding_context(cpi);
vp9_pack_bitstream(cpi, dest, size);
vp9_pack_bitstream(cpi, dest, dest_size, size);
restore_coding_context(cpi);
frame_size = (int)(*size) << 3;
// Check if encoded frame will overshoot too much, and if so, set the q and
@ -4472,7 +4472,8 @@ static void rq_model_update(const RATE_QINDEX_HISTORY *rq_history,
}
#endif // CONFIG_RATE_CTRL
static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, uint8_t *dest
static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, uint8_t *dest,
size_t dest_size
#if CONFIG_RATE_CTRL
,
RATE_QINDEX_HISTORY *rq_history
@ -4665,7 +4666,8 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, uint8_t *dest
// to recode.
if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
save_coding_context(cpi);
if (!cpi->sf.use_nonrd_pick_mode) vp9_pack_bitstream(cpi, dest, size);
if (!cpi->sf.use_nonrd_pick_mode)
vp9_pack_bitstream(cpi, dest, dest_size, size);
rc->projected_frame_size = (int)(*size) << 3;
@ -5173,7 +5175,7 @@ static void spatial_denoise_frame(VP9_COMP *cpi) {
#if !CONFIG_REALTIME_ONLY
static void vp9_try_disable_lookahead_aq(VP9_COMP *cpi, size_t *size,
uint8_t *dest) {
uint8_t *dest, size_t dest_size) {
if (cpi->common.seg.enabled)
if (ALT_REF_AQ_PROTECT_GAIN) {
size_t nsize = *size;
@ -5184,7 +5186,7 @@ static void vp9_try_disable_lookahead_aq(VP9_COMP *cpi, size_t *size,
save_coding_context(cpi);
vp9_disable_segmentation(&cpi->common.seg);
vp9_pack_bitstream(cpi, dest, &nsize);
vp9_pack_bitstream(cpi, dest, dest_size, &nsize);
restore_coding_context(cpi);
overhead = (int)*size - (int)nsize;
@ -5477,8 +5479,8 @@ static void update_encode_frame_result_simple_encode(
#endif // !CONFIG_REALTIME_ONLY
static void encode_frame_to_data_rate(
VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags,
ENCODE_FRAME_RESULT *encode_frame_result) {
VP9_COMP *cpi, size_t *size, uint8_t *dest, size_t dest_size,
unsigned int *frame_flags, ENCODE_FRAME_RESULT *encode_frame_result) {
VP9_COMMON *const cm = &cpi->common;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
struct segmentation *const seg = &cm->seg;
@ -5585,16 +5587,17 @@ static void encode_frame_to_data_rate(
}
if (cpi->sf.recode_loop == DISALLOW_RECODE) {
if (!encode_without_recode_loop(cpi, size, dest)) return;
if (!encode_without_recode_loop(cpi, size, dest, dest_size)) return;
} else {
#if !CONFIG_REALTIME_ONLY
#if CONFIG_RATE_CTRL
encode_with_recode_loop(cpi, size, dest, &encode_frame_result->rq_history);
encode_with_recode_loop(cpi, size, dest, dest_size,
&encode_frame_result->rq_history);
#else // CONFIG_RATE_CTRL
#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, encode_with_recode_loop_time);
#endif
encode_with_recode_loop(cpi, size, dest);
encode_with_recode_loop(cpi, size, dest, dest_size);
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, encode_with_recode_loop_time);
#endif
@ -5614,7 +5617,7 @@ static void encode_frame_to_data_rate(
#if !CONFIG_REALTIME_ONLY
// Disable segmentation if it decrease rate/distortion ratio
if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
vp9_try_disable_lookahead_aq(cpi, size, dest);
vp9_try_disable_lookahead_aq(cpi, size, dest, dest_size);
#endif
#if CONFIG_VP9_TEMPORAL_DENOISING
@ -5671,7 +5674,7 @@ static void encode_frame_to_data_rate(
start_timing(cpi, vp9_pack_bitstream_time);
#endif
// build the bitstream
vp9_pack_bitstream(cpi, dest, size);
vp9_pack_bitstream(cpi, dest, dest_size, size);
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, vp9_pack_bitstream_time);
#endif
@ -5862,32 +5865,33 @@ static void encode_frame_to_data_rate(
}
static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
unsigned int *frame_flags) {
size_t dest_size, unsigned int *frame_flags) {
vp9_rc_get_svc_params(cpi);
encode_frame_to_data_rate(cpi, size, dest, frame_flags,
encode_frame_to_data_rate(cpi, size, dest, dest_size, frame_flags,
/*encode_frame_result = */ NULL);
}
static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
unsigned int *frame_flags) {
size_t dest_size, unsigned int *frame_flags) {
if (cpi->oxcf.rc_mode == VPX_CBR) {
vp9_rc_get_one_pass_cbr_params(cpi);
} else {
vp9_rc_get_one_pass_vbr_params(cpi);
}
encode_frame_to_data_rate(cpi, size, dest, frame_flags,
encode_frame_to_data_rate(cpi, size, dest, dest_size, frame_flags,
/*encode_frame_result = */ NULL);
}
#if !CONFIG_REALTIME_ONLY
static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
unsigned int *frame_flags,
size_t dest_size, unsigned int *frame_flags,
ENCODE_FRAME_RESULT *encode_frame_result) {
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
#if CONFIG_MISMATCH_DEBUG
mismatch_move_frame_idx_w();
#endif
encode_frame_to_data_rate(cpi, size, dest, frame_flags, encode_frame_result);
encode_frame_to_data_rate(cpi, size, dest, dest_size, frame_flags,
encode_frame_result);
}
#endif // !CONFIG_REALTIME_ONLY
@ -6300,8 +6304,8 @@ void vp9_init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) {
}
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest, int64_t *time_stamp,
int64_t *time_end, int flush,
size_t *size, uint8_t *dest, size_t dest_size,
int64_t *time_stamp, int64_t *time_end, int flush,
ENCODE_FRAME_RESULT *encode_frame_result) {
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
VP9_COMMON *const cm = &cpi->common;
@ -6583,10 +6587,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
#if CONFIG_REALTIME_ONLY
(void)encode_frame_result;
if (cpi->use_svc) {
SvcEncode(cpi, size, dest, frame_flags);
SvcEncode(cpi, size, dest, dest_size, frame_flags);
} else {
// One pass encode
Pass0Encode(cpi, size, dest, frame_flags);
Pass0Encode(cpi, size, dest, dest_size, frame_flags);
}
#else // !CONFIG_REALTIME_ONLY
if (oxcf->pass == 1 && !cpi->use_svc) {
@ -6609,16 +6613,16 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
// Accumulate 2nd pass time in 2-pass case.
start_timing(cpi, Pass2Encode_time);
#endif
Pass2Encode(cpi, size, dest, frame_flags, encode_frame_result);
Pass2Encode(cpi, size, dest, dest_size, frame_flags, encode_frame_result);
vp9_twopass_postencode_update(cpi);
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, Pass2Encode_time);
#endif
} else if (cpi->use_svc) {
SvcEncode(cpi, size, dest, frame_flags);
SvcEncode(cpi, size, dest, dest_size, frame_flags);
} else {
// One pass encode
Pass0Encode(cpi, size, dest, frame_flags);
Pass0Encode(cpi, size, dest, dest_size, frame_flags);
}
#endif // CONFIG_REALTIME_ONLY

View File

@ -339,6 +339,10 @@ typedef struct TileDataEnc {
// Used for adaptive_rd_thresh with row multithreading
int *row_base_thresh_freq_fact;
// The value of sb_rows when row_base_thresh_freq_fact is allocated.
// The row_base_thresh_freq_fact array has sb_rows * BLOCK_SIZES * MAX_MODES
// elements.
int sb_rows;
MV firstpass_top_mv;
} TileDataEnc;
@ -1221,8 +1225,8 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
int64_t end_time);
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest, int64_t *time_stamp,
int64_t *time_end, int flush,
size_t *size, uint8_t *dest, size_t dest_size,
int64_t *time_stamp, int64_t *time_end, int flush,
ENCODE_FRAME_RESULT *encode_frame_result);
int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,

View File

@ -19,6 +19,7 @@ extern "C" {
#define MAX_NUM_TILE_COLS (1 << 6)
#define MAX_NUM_TILE_ROWS 4
#define MAX_NUM_THREADS 64
struct VP9_COMP;
struct ThreadData;

View File

@ -2302,6 +2302,44 @@ static void define_gf_group_structure(VP9_COMP *cpi) {
gf_group->gf_group_size = frame_index;
}
static void ext_rc_define_gf_group_structure(
VP9_COMP *cpi, vpx_rc_gop_decision_t *gop_decision) {
RATE_CONTROL *const rc = &cpi->rc;
TWO_PASS *const twopass = &cpi->twopass;
GF_GROUP *const gf_group = &twopass->gf_group;
const int key_frame = cpi->common.frame_type == KEY_FRAME;
if (!key_frame) {
set_gf_overlay_frame_type(gf_group, 0, rc->source_alt_ref_active);
}
for (int frame_index = 1; frame_index < gop_decision->gop_coding_frames;
frame_index++) {
const int ext_frame_index = key_frame ? frame_index : frame_index - 1;
const vpx_rc_frame_update_type_t update_type =
gop_decision->update_type[ext_frame_index];
gf_group->update_type[frame_index] = (FRAME_UPDATE_TYPE)update_type;
if (update_type == VPX_RC_ARF_UPDATE) {
gf_group->rf_level[frame_index] = GF_ARF_STD;
gf_group->layer_depth[frame_index] = 1;
gf_group->arf_src_offset[frame_index] =
(unsigned char)(rc->baseline_gf_interval - 1);
gf_group->frame_gop_index[frame_index] = rc->baseline_gf_interval;
} else if (update_type == VPX_RC_LF_UPDATE) {
gf_group->frame_gop_index[frame_index] = frame_index;
gf_group->arf_src_offset[frame_index] = 0;
gf_group->rf_level[frame_index] = INTER_NORMAL;
gf_group->layer_depth[frame_index] = 2;
} else if (update_type == VPX_RC_OVERLAY_UPDATE) {
set_gf_overlay_frame_type(gf_group, frame_index,
rc->source_alt_ref_pending);
gf_group->arf_src_offset[frame_index] = 0;
gf_group->frame_gop_index[frame_index] = rc->baseline_gf_interval;
}
}
gf_group->max_layer_depth = 2;
}
static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
int gf_arf_bits) {
VP9EncoderConfig *const oxcf = &cpi->oxcf;
@ -3604,7 +3642,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
rc->baseline_gf_interval =
gop_decision.gop_coding_frames - rc->source_alt_ref_pending;
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
define_gf_group_structure(cpi);
ext_rc_define_gf_group_structure(cpi, &gop_decision);
}
} else {
// Keyframe and section processing.

View File

@ -55,16 +55,23 @@ void *vp9_enc_grp_get_next_job(MultiThreadHandle *multi_thread_ctxt,
void vp9_row_mt_alloc_rd_thresh(VP9_COMP *const cpi,
TileDataEnc *const this_tile) {
VP9_COMMON *const cm = &cpi->common;
const int sb_rows =
(mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2) + 1;
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
int i;
if (this_tile->row_base_thresh_freq_fact != NULL) {
if (sb_rows <= this_tile->sb_rows) {
return;
}
vpx_free(this_tile->row_base_thresh_freq_fact);
this_tile->row_base_thresh_freq_fact = NULL;
}
CHECK_MEM_ERROR(
&cm->error, this_tile->row_base_thresh_freq_fact,
(int *)vpx_calloc(sb_rows * BLOCK_SIZES * MAX_MODES,
sizeof(*(this_tile->row_base_thresh_freq_fact))));
for (i = 0; i < sb_rows * BLOCK_SIZES * MAX_MODES; i++)
this_tile->row_base_thresh_freq_fact[i] = RD_THRESH_INIT_FACT;
this_tile->sb_rows = sb_rows;
}
void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
@ -101,13 +108,6 @@ void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
TileDataEnc *this_tile = &cpi->tile_data[tile_col];
vp9_row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, jobs_per_tile_col);
if (cpi->sf.adaptive_rd_thresh_row_mt) {
if (this_tile->row_base_thresh_freq_fact != NULL) {
vpx_free(this_tile->row_base_thresh_freq_fact);
this_tile->row_base_thresh_freq_fact = NULL;
}
vp9_row_mt_alloc_rd_thresh(cpi, this_tile);
}
}
// Assign the sync pointer of tile row zero for every tile row > 0
@ -136,14 +136,17 @@ void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
#endif
// Deallocate memory for job queue
if (multi_thread_ctxt->job_queue) vpx_free(multi_thread_ctxt->job_queue);
if (multi_thread_ctxt->job_queue) {
vpx_free(multi_thread_ctxt->job_queue);
multi_thread_ctxt->job_queue = NULL;
}
#if CONFIG_MULTITHREAD
// Destroy mutex for each tile
for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
tile_col++) {
RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col];
if (row_mt_info) pthread_mutex_destroy(&row_mt_info->job_mutex);
pthread_mutex_destroy(&row_mt_info->job_mutex);
}
#endif
@ -169,6 +172,10 @@ void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
}
}
#endif
multi_thread_ctxt->allocated_tile_cols = 0;
multi_thread_ctxt->allocated_tile_rows = 0;
multi_thread_ctxt->allocated_vert_unit_rows = 0;
}
void vp9_multi_thread_tile_init(VP9_COMP *cpi) {

View File

@ -1698,7 +1698,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
MV_REFERENCE_FRAME usable_ref_frame, second_ref_frame;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
uint8_t mode_checked[MB_MODE_COUNT][MAX_REF_FRAMES];
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
struct buf_2d yv12_mb[4][MAX_MB_PLANE] = { 0 };
RD_COST this_rdc, best_rdc;
// var_y and sse_y are saved to be used in skipping checking
unsigned int var_y = UINT_MAX;

View File

@ -3435,6 +3435,14 @@ int vp9_active_edge_sb(VP9_COMP *cpi, int mi_row, int mi_col) {
}
#if !CONFIG_REALTIME_ONLY
void init_frame_mv(int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]) {
for (int mode = 0; mode < MB_MODE_COUNT; ++mode) {
for (int ref_frame = 0; ref_frame < MAX_REF_FRAMES; ++ref_frame) {
frame_mv[mode][ref_frame].as_int = INVALID_MV;
}
}
}
void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
MACROBLOCK *x, int mi_row, int mi_col,
RD_COST *rd_cost, BLOCK_SIZE bsize,
@ -3452,7 +3460,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
unsigned char segment_id = mi->segment_id;
int comp_pred, i, k;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
struct buf_2d yv12_mb[4][MAX_MB_PLANE] = { 0 };
int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
@ -3530,6 +3538,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
rd_cost->rate = INT_MAX;
init_frame_mv(frame_mv);
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
if ((cpi->ref_frame_flags & ref_frame_to_flag(ref_frame)) &&
@ -4297,7 +4307,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
unsigned char segment_id = mi->segment_id;
int comp_pred, i;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
struct buf_2d yv12_mb[4][MAX_MB_PLANE] = { 0 };
int64_t best_rd = best_rd_so_far;
int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
int64_t best_pred_diff[REFERENCE_MODES];

View File

@ -306,7 +306,7 @@ typedef struct SPEED_FEATURES {
// Turned off when (row_mt_bit_exact == 1 && adaptive_rd_thresh_row_mt == 0).
int adaptive_rd_thresh;
// Flag to use adaptive_rd_thresh when row-mt it enabled, only for non-rd
// Flag to use adaptive_rd_thresh when row-mt is enabled, only for non-rd
// pickmode.
int adaptive_rd_thresh_row_mt;

View File

@ -305,7 +305,9 @@ int VP9RateControlRTC::GetLoopfilterLevel() const {
bool VP9RateControlRTC::GetSegmentationData(
VP9SegmentationData *segmentation_data) const {
if (!cpi_->cyclic_refresh->apply_cyclic_refresh) return false;
if (!cpi_->cyclic_refresh || !cpi_->cyclic_refresh->apply_cyclic_refresh) {
return false;
}
segmentation_data->segmentation_map = cpi_->segmentation_map;
segmentation_data->segmentation_map_size =

View File

@ -502,6 +502,7 @@ static bool init_encode_frame_result(EncodeFrameResult *encode_frame_result,
encode_frame_result->coding_data.reset(
new (std::nothrow) uint8_t[max_coding_data_byte_size]);
encode_frame_result->max_coding_data_byte_size = max_coding_data_byte_size;
encode_frame_result->num_rows_4x4 = get_num_unit_4x4(frame_height);
encode_frame_result->num_cols_4x4 = get_num_unit_4x4(frame_width);
@ -512,6 +513,7 @@ static bool init_encode_frame_result(EncodeFrameResult *encode_frame_result,
encode_frame_result->tpl_stats_info.resize(MAX_LAG_BUFFERS);
if (encode_frame_result->coding_data.get() == nullptr) {
encode_frame_result->max_coding_data_byte_size = 0;
return false;
}
return init_image_buffer(&encode_frame_result->coded_frame, frame_width,
@ -919,7 +921,7 @@ void SimpleEncode::ComputeFirstPassStats() {
ENCODE_FRAME_RESULT encode_frame_info;
vp9_init_encode_frame_result(&encode_frame_info);
// TODO(angiebird): Call vp9_first_pass directly
vp9_get_compressed_data(impl_ptr_->cpi, &frame_flags, &size, nullptr,
vp9_get_compressed_data(impl_ptr_->cpi, &frame_flags, &size, nullptr, 0,
&time_stamp, &time_end, flush,
&encode_frame_info);
// vp9_get_compressed_data only generates first pass stats not
@ -1205,8 +1207,9 @@ void SimpleEncode::EncodeFrame(EncodeFrameResult *encode_frame_result) {
&encode_frame_info.coded_frame);
vp9_get_compressed_data(cpi, &frame_flags,
&encode_frame_result->coding_data_byte_size,
encode_frame_result->coding_data.get(), &time_stamp,
&time_end, flush, &encode_frame_info);
encode_frame_result->coding_data.get(),
encode_frame_result->max_coding_data_byte_size,
&time_stamp, &time_end, flush, &encode_frame_info);
if (out_file_ != nullptr) {
ivf_write_frame_header(out_file_, time_stamp,
encode_frame_result->coding_data_byte_size);
@ -1220,10 +1223,8 @@ void SimpleEncode::EncodeFrame(EncodeFrameResult *encode_frame_result) {
fprintf(stderr, "Coding data size <= 0\n");
abort();
}
const size_t max_coding_data_byte_size =
get_max_coding_data_byte_size(frame_width_, frame_height_);
if (encode_frame_result->coding_data_byte_size >
max_coding_data_byte_size) {
encode_frame_result->max_coding_data_byte_size) {
fprintf(stderr, "Coding data size exceeds the maximum.\n");
abort();
}

View File

@ -263,6 +263,7 @@ struct EncodeFrameResult {
// The EncodeFrame will allocate a buffer, write the coding data into the
// buffer and give the ownership of the buffer to coding_data.
std::unique_ptr<unsigned char[]> coding_data;
size_t max_coding_data_byte_size;
double psnr;
uint64_t sse;
int quantize_index;

View File

@ -19,11 +19,11 @@
#include "vpx_dsp/psnr.h"
#include "vpx_ports/static_assert.h"
#include "vpx_ports/system_state.h"
#include "vpx_util/vpx_thread.h"
#include "vpx_util/vpx_timestamp.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "./vpx_version.h"
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_ethread.h"
#include "vpx/vp8cx.h"
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_scale.h"
@ -1436,22 +1436,13 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
timebase_units_to_ticks(timebase_in_ts, pts_end);
res = image2yuvconfig(img, &sd);
if (sd.y_width != ctx->cfg.g_w || sd.y_height != ctx->cfg.g_h) {
/* from vpx_encoder.h for g_w/g_h:
"Note that the frames passed as input to the encoder must have this
resolution"
*/
ctx->base.err_detail = "Invalid input frame resolution";
res = VPX_CODEC_INVALID_PARAM;
} else {
// Store the original flags in to the frame buffer. Will extract the
// key frame flag when we actually encode this frame.
if (vp9_receive_raw_frame(cpi, flags | ctx->next_frame_flags, &sd,
// Store the original flags in to the frame buffer. Will extract the
// key frame flag when we actually encode this frame.
if (vp9_receive_raw_frame(cpi, flags | ctx->next_frame_flags, &sd,
dst_time_stamp, dst_end_time_stamp)) {
res = update_error_state(ctx, &cpi->common.error);
}
ctx->next_frame_flags = 0;
res = update_error_state(ctx, &cpi->common.error);
}
ctx->next_frame_flags = 0;
}
cx_data = ctx->cx_data;
@ -1459,13 +1450,14 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
/* Any pending invisible frames? */
if (ctx->pending_cx_data) {
assert(cx_data_sz >= ctx->pending_cx_data_sz);
memmove(cx_data, ctx->pending_cx_data, ctx->pending_cx_data_sz);
ctx->pending_cx_data = cx_data;
cx_data += ctx->pending_cx_data_sz;
cx_data_sz -= ctx->pending_cx_data_sz;
/* TODO: this is a minimal check, the underlying codec doesn't respect
* the buffer size anyway.
/* TODO(webm:1844): this is a minimal check, the underlying codec doesn't
* respect the buffer size anyway.
*/
if (cx_data_sz < ctx->cx_data_sz / 2) {
vpx_internal_error(&cpi->common.error, VPX_CODEC_ERROR,
@ -1484,9 +1476,9 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
ENCODE_FRAME_RESULT encode_frame_result;
vp9_init_encode_frame_result(&encode_frame_result);
// TODO(angiebird): Call vp9_first_pass directly
ret = vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data,
&dst_time_stamp, &dst_end_time_stamp,
!img, &encode_frame_result);
ret = vp9_get_compressed_data(
cpi, &lib_flags, &size, cx_data, cx_data_sz, &dst_time_stamp,
&dst_end_time_stamp, !img, &encode_frame_result);
assert(size == 0); // There is no compressed data in the first pass
(void)ret;
assert(ret == 0);
@ -1510,8 +1502,9 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
vp9_init_encode_frame_result(&encode_frame_result);
while (cx_data_sz >= ctx->cx_data_sz / 2 &&
-1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data,
&dst_time_stamp, &dst_end_time_stamp,
!img, &encode_frame_result)) {
cx_data_sz, &dst_time_stamp,
&dst_end_time_stamp, !img,
&encode_frame_result)) {
// Pack psnr pkt
if (size > 0 && !cpi->use_svc) {
// TODO(angiebird): Figure out while we don't need psnr pkt when
@ -1528,7 +1521,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
if (!cpi->common.show_frame ||
(cpi->use_svc && cpi->svc.spatial_layer_id <
cpi->svc.number_spatial_layers - 1)) {
if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data;
if (ctx->pending_cx_data == NULL) ctx->pending_cx_data = cx_data;
ctx->pending_cx_data_sz += size;
if (size)
ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;

View File

@ -14,6 +14,3 @@ text vpx_img_flip
text vpx_img_free
text vpx_img_set_rect
text vpx_img_wrap
text vpx_free_tpl_gop_stats
text vpx_read_tpl_gop_stats
text vpx_write_tpl_gop_stats

View File

@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
@ -21,14 +22,23 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt,
unsigned int buf_align,
unsigned int stride_align,
unsigned char *img_data) {
unsigned int h, w, s, xcs, ycs, bps;
unsigned int stride_in_bytes;
unsigned int h, w, xcs, ycs, bps;
uint64_t s;
int stride_in_bytes;
unsigned int align;
if (img != NULL) memset(img, 0, sizeof(vpx_image_t));
if (fmt == VPX_IMG_FMT_NONE) goto fail;
/* Impose maximum values on input parameters so that this function can
* perform arithmetic operations without worrying about overflows.
*/
if (d_w > 0x08000000 || d_h > 0x08000000 || buf_align > 65536 ||
stride_align > 65536) {
goto fail;
}
/* Treat align==0 like align==1 */
if (!buf_align) buf_align = 1;
@ -78,13 +88,28 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt,
default: ycs = 0; break;
}
/* Calculate storage sizes. If the buffer was allocated externally, the width
* and height shouldn't be adjusted. */
w = d_w;
h = d_h;
s = (fmt & VPX_IMG_FMT_PLANAR) ? w : bps * w / 8;
s = (s + stride_align - 1) & ~(stride_align - 1);
stride_in_bytes = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? s * 2 : s;
/* Calculate storage sizes. */
if (img_data) {
/* If the buffer was allocated externally, the width and height shouldn't
* be adjusted. */
w = d_w;
h = d_h;
} else {
/* Calculate storage sizes given the chroma subsampling */
align = (1 << xcs) - 1;
w = (d_w + align) & ~align;
assert(d_w <= w);
align = (1 << ycs) - 1;
h = (d_h + align) & ~align;
assert(d_h <= h);
}
s = (fmt & VPX_IMG_FMT_PLANAR) ? w : (uint64_t)bps * w / 8;
s = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? s * 2 : s;
s = (s + stride_align - 1) & ~((uint64_t)stride_align - 1);
if (s > INT_MAX) goto fail;
stride_in_bytes = (int)s;
s = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? s / 2 : s;
/* Allocate the new image */
if (!img) {
@ -99,15 +124,6 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt,
if (!img_data) {
uint64_t alloc_size;
/* Calculate storage sizes given the chroma subsampling */
align = (1 << xcs) - 1;
w = (d_w + align) & ~align;
align = (1 << ycs) - 1;
h = (d_h + align) & ~align;
s = (fmt & VPX_IMG_FMT_PLANAR) ? w : bps * w / 8;
s = (s + stride_align - 1) & ~(stride_align - 1);
stride_in_bytes = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? s * 2 : s;
alloc_size = (fmt & VPX_IMG_FMT_PLANAR) ? (uint64_t)h * s * bps / 8
: (uint64_t)h * s;
@ -148,8 +164,8 @@ vpx_image_t *vpx_img_alloc(vpx_image_t *img, vpx_img_fmt_t fmt,
vpx_image_t *vpx_img_wrap(vpx_image_t *img, vpx_img_fmt_t fmt, unsigned int d_w,
unsigned int d_h, unsigned int stride_align,
unsigned char *img_data) {
/* By setting buf_align = 1, we don't change buffer alignment in this
* function. */
/* Set buf_align = 1. It is ignored by img_alloc_helper because img_data is
* not NULL. */
return img_alloc_helper(img, fmt, d_w, d_h, 1, stride_align, img_data);
}
@ -172,34 +188,33 @@ int vpx_img_set_rect(vpx_image_t *img, unsigned int x, unsigned int y,
if (img->fmt & VPX_IMG_FMT_HAS_ALPHA) {
img->planes[VPX_PLANE_ALPHA] =
data + x * bytes_per_sample + y * img->stride[VPX_PLANE_ALPHA];
data += img->h * img->stride[VPX_PLANE_ALPHA];
data += (size_t)img->h * img->stride[VPX_PLANE_ALPHA];
}
img->planes[VPX_PLANE_Y] =
data + x * bytes_per_sample + y * img->stride[VPX_PLANE_Y];
data += img->h * img->stride[VPX_PLANE_Y];
data += (size_t)img->h * img->stride[VPX_PLANE_Y];
unsigned int uv_x = x >> img->x_chroma_shift;
unsigned int uv_y = y >> img->y_chroma_shift;
if (img->fmt == VPX_IMG_FMT_NV12) {
img->planes[VPX_PLANE_U] =
data + (x >> img->x_chroma_shift) +
(y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];
data + uv_x + uv_y * img->stride[VPX_PLANE_U];
img->planes[VPX_PLANE_V] = img->planes[VPX_PLANE_U] + 1;
} else if (!(img->fmt & VPX_IMG_FMT_UV_FLIP)) {
img->planes[VPX_PLANE_U] =
data + (x >> img->x_chroma_shift) * bytes_per_sample +
(y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];
data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];
data + uv_x * bytes_per_sample + uv_y * img->stride[VPX_PLANE_U];
data +=
(size_t)(img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];
img->planes[VPX_PLANE_V] =
data + (x >> img->x_chroma_shift) * bytes_per_sample +
(y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V];
data + uv_x * bytes_per_sample + uv_y * img->stride[VPX_PLANE_V];
} else {
img->planes[VPX_PLANE_V] =
data + (x >> img->x_chroma_shift) * bytes_per_sample +
(y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V];
data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_V];
data + uv_x * bytes_per_sample + uv_y * img->stride[VPX_PLANE_V];
data +=
(size_t)(img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_V];
img->planes[VPX_PLANE_U] =
data + (x >> img->x_chroma_shift) * bytes_per_sample +
(y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];
data + uv_x * bytes_per_sample + uv_y * img->stride[VPX_PLANE_U];
}
}
return 0;

View File

@ -1,107 +0,0 @@
/*
* Copyright (c) 2023 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include "vpx/vpx_codec.h"
#include "vpx/vpx_tpl.h"
#include "vpx_mem/vpx_mem.h"
#define CHECK_FPRINTF_ERROR(expr) \
do { \
if (expr < 0) { \
return VPX_CODEC_ERROR; \
} \
} while (0)
#define CHECK_FSCANF_ERROR(expr, expected_value) \
do { \
if (expr != expected_value) { \
return VPX_CODEC_ERROR; \
} \
} while (0)
vpx_codec_err_t vpx_write_tpl_gop_stats(FILE *tpl_file,
const VpxTplGopStats *tpl_gop_stats) {
int i;
if (tpl_file == NULL || tpl_gop_stats == NULL) return VPX_CODEC_INVALID_PARAM;
CHECK_FPRINTF_ERROR(fprintf(tpl_file, "%d\n", tpl_gop_stats->size));
for (i = 0; i < tpl_gop_stats->size; i++) {
VpxTplFrameStats frame_stats = tpl_gop_stats->frame_stats_list[i];
const int num_blocks = frame_stats.num_blocks;
int block;
CHECK_FPRINTF_ERROR(fprintf(tpl_file, "%d %d %d\n", frame_stats.frame_width,
frame_stats.frame_height, num_blocks));
for (block = 0; block < num_blocks; block++) {
VpxTplBlockStats block_stats = frame_stats.block_stats_list[block];
CHECK_FPRINTF_ERROR(
fprintf(tpl_file,
"%" PRId64 " %" PRId64 " %" PRId16 " %" PRId16 " %" PRId64
" %" PRId64 " %d\n",
block_stats.inter_cost, block_stats.intra_cost,
block_stats.mv_c, block_stats.mv_r, block_stats.srcrf_dist,
block_stats.srcrf_rate, block_stats.ref_frame_index));
}
}
return VPX_CODEC_OK;
}
vpx_codec_err_t vpx_read_tpl_gop_stats(FILE *tpl_file,
VpxTplGopStats *tpl_gop_stats) {
int i, frame_list_size;
if (tpl_file == NULL || tpl_gop_stats == NULL) return VPX_CODEC_INVALID_PARAM;
CHECK_FSCANF_ERROR(fscanf(tpl_file, "%d\n", &frame_list_size), 1);
tpl_gop_stats->size = frame_list_size;
tpl_gop_stats->frame_stats_list = (VpxTplFrameStats *)vpx_calloc(
frame_list_size, sizeof(tpl_gop_stats->frame_stats_list[0]));
if (tpl_gop_stats->frame_stats_list == NULL) {
return VPX_CODEC_MEM_ERROR;
}
for (i = 0; i < frame_list_size; i++) {
VpxTplFrameStats *frame_stats = &tpl_gop_stats->frame_stats_list[i];
int num_blocks, width, height, block;
CHECK_FSCANF_ERROR(
fscanf(tpl_file, "%d %d %d\n", &width, &height, &num_blocks), 3);
frame_stats->num_blocks = num_blocks;
frame_stats->frame_width = width;
frame_stats->frame_height = height;
frame_stats->block_stats_list = (VpxTplBlockStats *)vpx_calloc(
num_blocks, sizeof(frame_stats->block_stats_list[0]));
if (frame_stats->block_stats_list == NULL) {
vpx_free_tpl_gop_stats(tpl_gop_stats);
return VPX_CODEC_MEM_ERROR;
}
for (block = 0; block < num_blocks; block++) {
VpxTplBlockStats *block_stats = &frame_stats->block_stats_list[block];
CHECK_FSCANF_ERROR(
fscanf(tpl_file,
"%" SCNd64 " %" SCNd64 " %" SCNd16 " %" SCNd16 " %" SCNd64
" %" SCNd64 " %d\n",
&block_stats->inter_cost, &block_stats->intra_cost,
&block_stats->mv_c, &block_stats->mv_r,
&block_stats->srcrf_dist, &block_stats->srcrf_rate,
&block_stats->ref_frame_index),
7);
}
}
return VPX_CODEC_OK;
}
void vpx_free_tpl_gop_stats(VpxTplGopStats *tpl_gop_stats) {
int frame;
if (tpl_gop_stats == NULL) return;
for (frame = 0; frame < tpl_gop_stats->size; frame++) {
vpx_free(tpl_gop_stats->frame_stats_list[frame].block_stats_list);
}
vpx_free(tpl_gop_stats->frame_stats_list);
}

View File

@ -37,7 +37,6 @@ API_SRCS-yes += internal/vpx_codec_internal.h
API_SRCS-yes += internal/vpx_ratectrl_rtc.h
API_SRCS-yes += src/vpx_codec.c
API_SRCS-yes += src/vpx_image.c
API_SRCS-yes += src/vpx_tpl.c
API_SRCS-yes += vpx_codec.h
API_SRCS-yes += vpx_codec.mk
API_SRCS-yes += vpx_frame_buffer.h

View File

@ -1019,6 +1019,8 @@ typedef unsigned long vpx_enc_deadline_t;
*
* \param[in] ctx Pointer to this instance's context
* \param[in] img Image data to encode, NULL to flush.
* Encoding sample values outside the range
* [0..(1<<img->bit_depth)-1] is undefined behavior.
* \param[in] pts Presentation time stamp, in timebase units.
* \param[in] duration Duration to show frame, in timebase units.
* \param[in] flags Flags to use for encoding this frame.

View File

@ -28,6 +28,15 @@ extern "C" {
*/
#define VPX_EXT_RATECTRL_ABI_VERSION (5 + VPX_TPL_ABI_VERSION)
/*!\brief This is correspondent to MAX_STATIC_GF_GROUP_LENGTH defined in
* vp9_ratectrl.h
*/
#define VPX_RC_MAX_STATIC_GF_GROUP_LENGTH 250
/*!\brief Max number of ref frames returned by the external RC. Correspondent to
* MAX_REF_FRAMES defined in vp9_blockd.h. */
#define VPX_RC_MAX_REF_FRAMES 4
/*!\brief The control type of the inference API.
* In VPX_RC_QP mode, the external rate control model determines the
* quantization parameter (QP) for each frame.
@ -56,6 +65,29 @@ typedef enum vpx_ext_rc_mode {
VPX_RC_CQ = 2,
} vpx_ext_rc_mode_t;
/*!\brief This is correspondent to FRAME_UPDATE_TYPE defined in vp9_firstpass.h.
*/
typedef enum vpx_rc_frame_update_type {
VPX_RC_INVALID_UPDATE_TYPE = -1,
VPX_RC_KF_UPDATE = 0,
VPX_RC_LF_UPDATE = 1,
VPX_RC_GF_UPDATE = 2,
VPX_RC_ARF_UPDATE = 3,
VPX_RC_OVERLAY_UPDATE = 4,
VPX_RC_MID_OVERLAY_UPDATE = 5,
VPX_RC_USE_BUF_FRAME = 6,
} vpx_rc_frame_update_type_t;
/*!\brief Name for the ref frames returned by the external RC. Correspondent to
* the ref frames defined in vp9_blockd.h. */
typedef enum vpx_rc_ref_name {
VPX_RC_INVALID_REF_FRAME = -1,
VPX_RC_INTRA_FRAME = 0,
VPX_RC_LAST_FRAME = 1,
VPX_RC_GOLDEN_FRAME = 2,
VPX_RC_ALTREF_FRAME = 3,
} vpx_rc_ref_name_t;
/*!\brief Abstract rate control model handler
*
* The encoder will receive the model handler from create_model() defined in
@ -318,75 +350,12 @@ typedef struct vpx_rc_config {
int base_qp; /**< base QP for leaf frames, 0-255 */
} vpx_rc_config_t;
/*!\brief Information passed to the external rate control model to
* help make GOP decisions.
/*!\brief Control what ref frame to use and its index.
*/
typedef struct vpx_rc_gop_info {
/*!
* Minimum allowed gf interval, fixed for the whole clip.
* Note that it will be modified to match vp9's level constraints
* in the encoder.
* The level constraint is defined in vp9_encoder.c:
* const Vp9LevelSpec vp9_level_defs[VP9_LEVELS].
*/
int min_gf_interval;
/*!
* Maximum allowed gf interval, fixed for the whole clip.
*/
int max_gf_interval;
/*!
* Minimum allowed gf interval for the current GOP, determined
* by the encoder.
*/
int active_min_gf_interval;
/*!
* Maximum allowed gf interval for the current GOP, determined
* by the encoder.
*/
int active_max_gf_interval;
/*!
* Whether to allow the use of alt ref, determined by the encoder.
* It is fixed for the entire encode.
* See function "is_altref_enabled" in vp9_encoder.h.
*/
int allow_alt_ref;
/*!
* Is the current frame a key frame.
*/
int is_key_frame;
/*!
* Does the previous gop use alt ref or not.
*/
int last_gop_use_alt_ref;
/*!
* Current frame distance to the last keyframe, e.g., if Nth frame is a key,
* then the value of the N+1 th frame is 1.
*/
int frames_since_key;
/*!
* Current frame distance to the next keyframe, e.g. if Nth frame is a key,
* then the value of frame N - 1 is 1.
*/
int frames_to_key;
/*!
* Number of lookahead source frames.
*/
int lag_in_frames;
/*!
* Display index (temporal stamp) of this frame in the whole clip,
* starts from zero.
*/
int show_index;
/*!
* Coding index of this frame in the whole clip, starts from zero.
*/
int coding_index;
/*!
* The index of the current gop, starts from zero, resets to zero
* when a keyframe is set.
*/
int gop_global_index;
} vpx_rc_gop_info_t;
typedef struct vpx_rc_ref_frame {
int index[VPX_RC_MAX_REF_FRAMES];
vpx_rc_ref_name_t name[VPX_RC_MAX_REF_FRAMES];
} vpx_rc_ref_frame_t;
/*!\brief The decision made by the external rate control model to set the
* group of picture.
@ -395,6 +364,14 @@ typedef struct vpx_rc_gop_decision {
int gop_coding_frames; /**< The number of frames of this GOP */
int use_alt_ref; /**< Whether to use alt ref for this GOP */
int use_key_frame; /**< Whether to set key frame for this GOP */
// Frame type for each frame in this GOP.
// This will be populated to |update_type| in GF_GROUP defined in
// vp9_firstpass.h
vpx_rc_frame_update_type_t update_type[VPX_RC_MAX_STATIC_GF_GROUP_LENGTH + 2];
// Ref frame buffer index to be updated for each frame in this GOP.
int update_ref_index[VPX_RC_MAX_STATIC_GF_GROUP_LENGTH + 2];
// Ref frame list to be used for each frame in this GOP.
vpx_rc_ref_frame_t ref_frame_list[VPX_RC_MAX_STATIC_GF_GROUP_LENGTH + 2];
} vpx_rc_gop_decision_t;
/*!\brief Create an external rate control model callback prototype

View File

@ -64,8 +64,12 @@ typedef enum vpx_color_space {
/*!\brief List of supported color range */
typedef enum vpx_color_range {
VPX_CR_STUDIO_RANGE = 0, /**< Y [16..235], UV [16..240] */
VPX_CR_FULL_RANGE = 1 /**< YUV/RGB [0..255] */
VPX_CR_STUDIO_RANGE = 0, /**<- Y [16..235], UV [16..240] (bit depth 8) */
/**<- Y [64..940], UV [64..960] (bit depth 10) */
/**<- Y [256..3760], UV [256..3840] (bit depth 12) */
VPX_CR_FULL_RANGE = 1 /**<- YUV/RGB [0..255] (bit depth 8) */
/**<- YUV/RGB [0..1023] (bit depth 10) */
/**<- YUV/RGB [0..4095] (bit depth 12) */
} vpx_color_range_t; /**< alias for enum vpx_color_range */
/**\brief Image Descriptor */
@ -132,10 +136,13 @@ typedef struct vpx_image_rect {
* is NULL, the storage for the descriptor will be
* allocated on the heap.
* \param[in] fmt Format for the image
* \param[in] d_w Width of the image
* \param[in] d_h Height of the image
* \param[in] d_w Width of the image. Must not exceed 0x08000000
* (2^27).
* \param[in] d_h Height of the image. Must not exceed 0x08000000
* (2^27).
* \param[in] align Alignment, in bytes, of the image buffer and
* each row in the image(stride).
* each row in the image (stride). Must not exceed
* 65536.
*
* \return Returns a pointer to the initialized image descriptor. If the img
* parameter is non-null, the value of the img parameter will be
@ -155,9 +162,12 @@ vpx_image_t *vpx_img_alloc(vpx_image_t *img, vpx_img_fmt_t fmt,
* parameter is NULL, the storage for the descriptor
* will be allocated on the heap.
* \param[in] fmt Format for the image
* \param[in] d_w Width of the image
* \param[in] d_h Height of the image
* \param[in] stride_align Alignment, in bytes, of each row in the image.
* \param[in] d_w Width of the image. Must not exceed 0x08000000
* (2^27).
* \param[in] d_h Height of the image. Must not exceed 0x08000000
* (2^27).
* \param[in] stride_align Alignment, in bytes, of each row in the image
* (stride). Must not exceed 65536.
* \param[in] img_data Storage to use for the image
*
* \return Returns a pointer to the initialized image descriptor. If the img

View File

@ -15,8 +15,6 @@
#ifndef VPX_VPX_VPX_TPL_H_
#define VPX_VPX_VPX_TPL_H_
#include <stdio.h>
#include "./vpx_integer.h"
#include "./vpx_codec.h"
@ -32,7 +30,7 @@ extern "C" {
* types, removing or reassigning enums, adding/removing/rearranging
* fields to structures
*/
#define VPX_TPL_ABI_VERSION (3) /**<\hideinitializer*/
#define VPX_TPL_ABI_VERSION 4 /**<\hideinitializer*/
/*!\brief Temporal dependency model stats for each block before propagation */
typedef struct VpxTplBlockStats {
@ -63,40 +61,6 @@ typedef struct VpxTplGopStats {
VpxTplFrameStats *frame_stats_list; /**< List of tpl stats for each frame */
} VpxTplGopStats;
/*!\brief Write VpxTplGopStats to file
*
* Accepts an opened file handle and writes \p tpl_gop_stats.
*
* \param[in] tpl_file A FILE pointer that's already been opened.
* \param[in] tpl_gop_stats VpxTplGopStats that contains TPL stats for the
* whole GOP.
*
* \return VPX_CODEC_OK if TPL stats are successfully written.
*/
vpx_codec_err_t vpx_write_tpl_gop_stats(FILE *tpl_file,
const VpxTplGopStats *tpl_gop_stats);
/*!\brief Read VpxTplGopStats from file
*
* Accepts an opened file handle and reads TPL stats and stores them into
* \p tpl_gop_stats. Allocates memory for TPL stats.
*
* \param[in] tpl_file A FILE pointer that's already been opened.
* \param[out] tpl_gop_stats VpxTplGopStats that contains TPL stats for the
* whole GOP.
*
* \return VPX_CODEC_OK if TPL stats are successfully read from file.
*/
vpx_codec_err_t vpx_read_tpl_gop_stats(FILE *tpl_file,
VpxTplGopStats *tpl_gop_stats);
/*!\brief Free the memory allocated for VpxTplGopStats
*
* \param[in] tpl_gop_stats VpxTplGopStats that contains TPL stats for the
* whole GOP.
*/
void vpx_free_tpl_gop_stats(VpxTplGopStats *tpl_gop_stats);
#ifdef __cplusplus
} // extern "C"
#endif

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2024 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VPX_VPX_DSP_ARM_HIGHBD_CONVOLVE8_NEON_H_
#define VPX_VPX_DSP_ARM_HIGHBD_CONVOLVE8_NEON_H_
#include <arm_neon.h>
static INLINE uint16x4_t highbd_convolve4_4_neon(
const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
const int16x4_t s3, const int16x4_t filters, const uint16x4_t max) {
int32x4_t sum = vmull_lane_s16(s0, filters, 0);
sum = vmlal_lane_s16(sum, s1, filters, 1);
sum = vmlal_lane_s16(sum, s2, filters, 2);
sum = vmlal_lane_s16(sum, s3, filters, 3);
uint16x4_t res = vqrshrun_n_s32(sum, FILTER_BITS);
return vmin_u16(res, max);
}
static INLINE uint16x8_t highbd_convolve4_8_neon(
const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
const int16x8_t s3, const int16x4_t filters, const uint16x8_t max) {
int32x4_t sum0 = vmull_lane_s16(vget_low_s16(s0), filters, 0);
sum0 = vmlal_lane_s16(sum0, vget_low_s16(s1), filters, 1);
sum0 = vmlal_lane_s16(sum0, vget_low_s16(s2), filters, 2);
sum0 = vmlal_lane_s16(sum0, vget_low_s16(s3), filters, 3);
int32x4_t sum1 = vmull_lane_s16(vget_high_s16(s0), filters, 0);
sum1 = vmlal_lane_s16(sum1, vget_high_s16(s1), filters, 1);
sum1 = vmlal_lane_s16(sum1, vget_high_s16(s2), filters, 2);
sum1 = vmlal_lane_s16(sum1, vget_high_s16(s3), filters, 3);
uint16x8_t res = vcombine_u16(vqrshrun_n_s32(sum0, FILTER_BITS),
vqrshrun_n_s32(sum1, FILTER_BITS));
return vminq_u16(res, max);
}
#endif // VPX_VPX_DSP_ARM_HIGHBD_CONVOLVE8_NEON_H_

View File

@ -0,0 +1,99 @@
/*
* Copyright (c) 2024 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VPX_VPX_DSP_ARM_HIGHBD_CONVOLVE8_SVE_H_
#define VPX_VPX_DSP_ARM_HIGHBD_CONVOLVE8_SVE_H_
#include <arm_neon.h>
#include "vpx_dsp/arm/vpx_neon_sve_bridge.h"
static INLINE uint16x4_t highbd_convolve4_4_sve(const int16x4_t s[4],
const int16x8_t filter,
const uint16x4_t max) {
int16x8_t s01 = vcombine_s16(s[0], s[1]);
int16x8_t s23 = vcombine_s16(s[2], s[3]);
int64x2_t sum01 = vpx_dotq_lane_s16(vdupq_n_s64(0), s01, filter, 0);
int64x2_t sum23 = vpx_dotq_lane_s16(vdupq_n_s64(0), s23, filter, 0);
int32x4_t res_s32 = vcombine_s32(vmovn_s64(sum01), vmovn_s64(sum23));
uint16x4_t res_u16 = vqrshrun_n_s32(res_s32, FILTER_BITS);
return vmin_u16(res_u16, max);
}
static INLINE uint16x8_t highbd_convolve4_8_sve(const int16x8_t s[4],
const int16x8_t filter,
const uint16x8_t max,
uint16x8_t idx) {
int64x2_t sum04 = vpx_dotq_lane_s16(vdupq_n_s64(0), s[0], filter, 0);
int64x2_t sum15 = vpx_dotq_lane_s16(vdupq_n_s64(0), s[1], filter, 0);
int64x2_t sum26 = vpx_dotq_lane_s16(vdupq_n_s64(0), s[2], filter, 0);
int64x2_t sum37 = vpx_dotq_lane_s16(vdupq_n_s64(0), s[3], filter, 0);
int32x4_t res0 = vcombine_s32(vmovn_s64(sum04), vmovn_s64(sum15));
int32x4_t res1 = vcombine_s32(vmovn_s64(sum26), vmovn_s64(sum37));
uint16x8_t res = vcombine_u16(vqrshrun_n_s32(res0, FILTER_BITS),
vqrshrun_n_s32(res1, FILTER_BITS));
res = vpx_tbl_u16(res, idx);
return vminq_u16(res, max);
}
static INLINE uint16x4_t highbd_convolve8_4(const int16x8_t s[4],
const int16x8_t filter,
const uint16x4_t max) {
int64x2_t sum[4];
sum[0] = vpx_dotq_s16(vdupq_n_s64(0), s[0], filter);
sum[1] = vpx_dotq_s16(vdupq_n_s64(0), s[1], filter);
sum[2] = vpx_dotq_s16(vdupq_n_s64(0), s[2], filter);
sum[3] = vpx_dotq_s16(vdupq_n_s64(0), s[3], filter);
sum[0] = vpaddq_s64(sum[0], sum[1]);
sum[2] = vpaddq_s64(sum[2], sum[3]);
int32x4_t res_s32 = vcombine_s32(vmovn_s64(sum[0]), vmovn_s64(sum[2]));
uint16x4_t res_u16 = vqrshrun_n_s32(res_s32, FILTER_BITS);
return vmin_u16(res_u16, max);
}
static INLINE uint16x8_t highbd_convolve8_8(const int16x8_t s[8],
const int16x8_t filter,
const uint16x8_t max) {
int64x2_t sum[8];
sum[0] = vpx_dotq_s16(vdupq_n_s64(0), s[0], filter);
sum[1] = vpx_dotq_s16(vdupq_n_s64(0), s[1], filter);
sum[2] = vpx_dotq_s16(vdupq_n_s64(0), s[2], filter);
sum[3] = vpx_dotq_s16(vdupq_n_s64(0), s[3], filter);
sum[4] = vpx_dotq_s16(vdupq_n_s64(0), s[4], filter);
sum[5] = vpx_dotq_s16(vdupq_n_s64(0), s[5], filter);
sum[6] = vpx_dotq_s16(vdupq_n_s64(0), s[6], filter);
sum[7] = vpx_dotq_s16(vdupq_n_s64(0), s[7], filter);
int64x2_t sum01 = vpaddq_s64(sum[0], sum[1]);
int64x2_t sum23 = vpaddq_s64(sum[2], sum[3]);
int64x2_t sum45 = vpaddq_s64(sum[4], sum[5]);
int64x2_t sum67 = vpaddq_s64(sum[6], sum[7]);
int32x4_t res0 = vcombine_s32(vmovn_s64(sum01), vmovn_s64(sum23));
int32x4_t res1 = vcombine_s32(vmovn_s64(sum45), vmovn_s64(sum67));
uint16x8_t res = vcombine_u16(vqrshrun_n_s32(res0, FILTER_BITS),
vqrshrun_n_s32(res1, FILTER_BITS));
return vminq_u16(res, max);
}
#endif // VPX_VPX_DSP_ARM_HIGHBD_CONVOLVE8_SVE_H_

View File

@ -14,42 +14,13 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/arm/highbd_convolve8_neon.h"
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/transpose_neon.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/vpx_filter.h"
#include "vpx_ports/mem.h"
static INLINE uint16x4_t highbd_convolve4_4(
const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
const int16x4_t s3, const int16x4_t filters, const uint16x4_t max) {
int32x4_t sum = vmull_lane_s16(s0, filters, 0);
sum = vmlal_lane_s16(sum, s1, filters, 1);
sum = vmlal_lane_s16(sum, s2, filters, 2);
sum = vmlal_lane_s16(sum, s3, filters, 3);
uint16x4_t res = vqrshrun_n_s32(sum, FILTER_BITS);
return vmin_u16(res, max);
}
static INLINE uint16x8_t highbd_convolve4_8(
const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
const int16x8_t s3, const int16x4_t filters, const uint16x8_t max) {
int32x4_t sum0 = vmull_lane_s16(vget_low_s16(s0), filters, 0);
sum0 = vmlal_lane_s16(sum0, vget_low_s16(s1), filters, 1);
sum0 = vmlal_lane_s16(sum0, vget_low_s16(s2), filters, 2);
sum0 = vmlal_lane_s16(sum0, vget_low_s16(s3), filters, 3);
int32x4_t sum1 = vmull_lane_s16(vget_high_s16(s0), filters, 0);
sum1 = vmlal_lane_s16(sum1, vget_high_s16(s1), filters, 1);
sum1 = vmlal_lane_s16(sum1, vget_high_s16(s2), filters, 2);
sum1 = vmlal_lane_s16(sum1, vget_high_s16(s3), filters, 3);
uint16x8_t res = vcombine_u16(vqrshrun_n_s32(sum0, FILTER_BITS),
vqrshrun_n_s32(sum1, FILTER_BITS));
return vminq_u16(res, max);
}
static INLINE uint16x4_t
highbd_convolve8_4(const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
@ -118,13 +89,13 @@ static INLINE void highbd_convolve_4tap_horiz_neon(
load_s16_4x4(s + 3 * src_stride, 1, &s3[0], &s3[1], &s3[2], &s3[3]);
uint16x4_t d0 =
highbd_convolve4_4(s0[0], s0[1], s0[2], s0[3], filter, max);
highbd_convolve4_4_neon(s0[0], s0[1], s0[2], s0[3], filter, max);
uint16x4_t d1 =
highbd_convolve4_4(s1[0], s1[1], s1[2], s1[3], filter, max);
highbd_convolve4_4_neon(s1[0], s1[1], s1[2], s1[3], filter, max);
uint16x4_t d2 =
highbd_convolve4_4(s2[0], s2[1], s2[2], s2[3], filter, max);
highbd_convolve4_4_neon(s2[0], s2[1], s2[2], s2[3], filter, max);
uint16x4_t d3 =
highbd_convolve4_4(s3[0], s3[1], s3[2], s3[3], filter, max);
highbd_convolve4_4_neon(s3[0], s3[1], s3[2], s3[3], filter, max);
store_u16_4x4(d, dst_stride, d0, d1, d2, d3);
@ -148,13 +119,13 @@ static INLINE void highbd_convolve_4tap_horiz_neon(
load_s16_8x4(s + 3 * src_stride, 1, &s3[0], &s3[1], &s3[2], &s3[3]);
uint16x8_t d0 =
highbd_convolve4_8(s0[0], s0[1], s0[2], s0[3], filter, max);
highbd_convolve4_8_neon(s0[0], s0[1], s0[2], s0[3], filter, max);
uint16x8_t d1 =
highbd_convolve4_8(s1[0], s1[1], s1[2], s1[3], filter, max);
highbd_convolve4_8_neon(s1[0], s1[1], s1[2], s1[3], filter, max);
uint16x8_t d2 =
highbd_convolve4_8(s2[0], s2[1], s2[2], s2[3], filter, max);
highbd_convolve4_8_neon(s2[0], s2[1], s2[2], s2[3], filter, max);
uint16x8_t d3 =
highbd_convolve4_8(s3[0], s3[1], s3[2], s3[3], filter, max);
highbd_convolve4_8_neon(s3[0], s3[1], s3[2], s3[3], filter, max);
store_u16_8x4(d, dst_stride, d0, d1, d2, d3);
@ -393,10 +364,10 @@ static INLINE void highbd_convolve_4tap_vert_neon(
int16x4_t s3, s4, s5, s6;
load_s16_4x4(s, src_stride, &s3, &s4, &s5, &s6);
uint16x4_t d0 = highbd_convolve4_4(s0, s1, s2, s3, filter, max);
uint16x4_t d1 = highbd_convolve4_4(s1, s2, s3, s4, filter, max);
uint16x4_t d2 = highbd_convolve4_4(s2, s3, s4, s5, filter, max);
uint16x4_t d3 = highbd_convolve4_4(s3, s4, s5, s6, filter, max);
uint16x4_t d0 = highbd_convolve4_4_neon(s0, s1, s2, s3, filter, max);
uint16x4_t d1 = highbd_convolve4_4_neon(s1, s2, s3, s4, filter, max);
uint16x4_t d2 = highbd_convolve4_4_neon(s2, s3, s4, s5, filter, max);
uint16x4_t d3 = highbd_convolve4_4_neon(s3, s4, s5, s6, filter, max);
store_u16_4x4(d, dst_stride, d0, d1, d2, d3);
@ -424,10 +395,10 @@ static INLINE void highbd_convolve_4tap_vert_neon(
int16x8_t s3, s4, s5, s6;
load_s16_8x4(s, src_stride, &s3, &s4, &s5, &s6);
uint16x8_t d0 = highbd_convolve4_8(s0, s1, s2, s3, filter, max);
uint16x8_t d1 = highbd_convolve4_8(s1, s2, s3, s4, filter, max);
uint16x8_t d2 = highbd_convolve4_8(s2, s3, s4, s5, filter, max);
uint16x8_t d3 = highbd_convolve4_8(s3, s4, s5, s6, filter, max);
uint16x8_t d0 = highbd_convolve4_8_neon(s0, s1, s2, s3, filter, max);
uint16x8_t d1 = highbd_convolve4_8_neon(s1, s2, s3, s4, filter, max);
uint16x8_t d2 = highbd_convolve4_8_neon(s2, s3, s4, s5, filter, max);
uint16x8_t d3 = highbd_convolve4_8_neon(s3, s4, s5, s6, filter, max);
store_u16_8x4(d, dst_stride, d0, d1, d2, d3);
@ -686,12 +657,12 @@ static INLINE void highbd_convolve_2d_4tap_neon(
load_s16_4x4(s + 1 * src_stride, 1, &h_s1[0], &h_s1[1], &h_s1[2], &h_s1[3]);
load_s16_4x4(s + 2 * src_stride, 1, &h_s2[0], &h_s2[1], &h_s2[2], &h_s2[3]);
int16x4_t v_s0 = vreinterpret_s16_u16(
highbd_convolve4_4(h_s0[0], h_s0[1], h_s0[2], h_s0[3], x_filter, max));
int16x4_t v_s1 = vreinterpret_s16_u16(
highbd_convolve4_4(h_s1[0], h_s1[1], h_s1[2], h_s1[3], x_filter, max));
int16x4_t v_s2 = vreinterpret_s16_u16(
highbd_convolve4_4(h_s2[0], h_s2[1], h_s2[2], h_s2[3], x_filter, max));
int16x4_t v_s0 = vreinterpret_s16_u16(highbd_convolve4_4_neon(
h_s0[0], h_s0[1], h_s0[2], h_s0[3], x_filter, max));
int16x4_t v_s1 = vreinterpret_s16_u16(highbd_convolve4_4_neon(
h_s1[0], h_s1[1], h_s1[2], h_s1[3], x_filter, max));
int16x4_t v_s2 = vreinterpret_s16_u16(highbd_convolve4_4_neon(
h_s2[0], h_s2[1], h_s2[2], h_s2[3], x_filter, max));
s += 3 * src_stride;
@ -706,19 +677,23 @@ static INLINE void highbd_convolve_2d_4tap_neon(
load_s16_4x4(s + 3 * src_stride, 1, &h_s6[0], &h_s6[1], &h_s6[2],
&h_s6[3]);
int16x4_t v_s3 = vreinterpret_s16_u16(highbd_convolve4_4(
int16x4_t v_s3 = vreinterpret_s16_u16(highbd_convolve4_4_neon(
h_s3[0], h_s3[1], h_s3[2], h_s3[3], x_filter, max));
int16x4_t v_s4 = vreinterpret_s16_u16(highbd_convolve4_4(
int16x4_t v_s4 = vreinterpret_s16_u16(highbd_convolve4_4_neon(
h_s4[0], h_s4[1], h_s4[2], h_s4[3], x_filter, max));
int16x4_t v_s5 = vreinterpret_s16_u16(highbd_convolve4_4(
int16x4_t v_s5 = vreinterpret_s16_u16(highbd_convolve4_4_neon(
h_s5[0], h_s5[1], h_s5[2], h_s5[3], x_filter, max));
int16x4_t v_s6 = vreinterpret_s16_u16(highbd_convolve4_4(
int16x4_t v_s6 = vreinterpret_s16_u16(highbd_convolve4_4_neon(
h_s6[0], h_s6[1], h_s6[2], h_s6[3], x_filter, max));
uint16x4_t d0 = highbd_convolve4_4(v_s0, v_s1, v_s2, v_s3, y_filter, max);
uint16x4_t d1 = highbd_convolve4_4(v_s1, v_s2, v_s3, v_s4, y_filter, max);
uint16x4_t d2 = highbd_convolve4_4(v_s2, v_s3, v_s4, v_s5, y_filter, max);
uint16x4_t d3 = highbd_convolve4_4(v_s3, v_s4, v_s5, v_s6, y_filter, max);
uint16x4_t d0 =
highbd_convolve4_4_neon(v_s0, v_s1, v_s2, v_s3, y_filter, max);
uint16x4_t d1 =
highbd_convolve4_4_neon(v_s1, v_s2, v_s3, v_s4, y_filter, max);
uint16x4_t d2 =
highbd_convolve4_4_neon(v_s2, v_s3, v_s4, v_s5, y_filter, max);
uint16x4_t d3 =
highbd_convolve4_4_neon(v_s3, v_s4, v_s5, v_s6, y_filter, max);
store_u16_4x4(d, dst_stride, d0, d1, d2, d3);
@ -745,12 +720,12 @@ static INLINE void highbd_convolve_2d_4tap_neon(
load_s16_8x4(s + 1 * src_stride, 1, &h_s1[0], &h_s1[1], &h_s1[2], &h_s1[3]);
load_s16_8x4(s + 2 * src_stride, 1, &h_s2[0], &h_s2[1], &h_s2[2], &h_s2[3]);
int16x8_t v_s0 = vreinterpretq_s16_u16(
highbd_convolve4_8(h_s0[0], h_s0[1], h_s0[2], h_s0[3], x_filter, max));
int16x8_t v_s1 = vreinterpretq_s16_u16(
highbd_convolve4_8(h_s1[0], h_s1[1], h_s1[2], h_s1[3], x_filter, max));
int16x8_t v_s2 = vreinterpretq_s16_u16(
highbd_convolve4_8(h_s2[0], h_s2[1], h_s2[2], h_s2[3], x_filter, max));
int16x8_t v_s0 = vreinterpretq_s16_u16(highbd_convolve4_8_neon(
h_s0[0], h_s0[1], h_s0[2], h_s0[3], x_filter, max));
int16x8_t v_s1 = vreinterpretq_s16_u16(highbd_convolve4_8_neon(
h_s1[0], h_s1[1], h_s1[2], h_s1[3], x_filter, max));
int16x8_t v_s2 = vreinterpretq_s16_u16(highbd_convolve4_8_neon(
h_s2[0], h_s2[1], h_s2[2], h_s2[3], x_filter, max));
s += 3 * src_stride;
@ -765,19 +740,23 @@ static INLINE void highbd_convolve_2d_4tap_neon(
load_s16_8x4(s + 3 * src_stride, 1, &h_s6[0], &h_s6[1], &h_s6[2],
&h_s6[3]);
int16x8_t v_s3 = vreinterpretq_s16_u16(highbd_convolve4_8(
int16x8_t v_s3 = vreinterpretq_s16_u16(highbd_convolve4_8_neon(
h_s3[0], h_s3[1], h_s3[2], h_s3[3], x_filter, max));
int16x8_t v_s4 = vreinterpretq_s16_u16(highbd_convolve4_8(
int16x8_t v_s4 = vreinterpretq_s16_u16(highbd_convolve4_8_neon(
h_s4[0], h_s4[1], h_s4[2], h_s4[3], x_filter, max));
int16x8_t v_s5 = vreinterpretq_s16_u16(highbd_convolve4_8(
int16x8_t v_s5 = vreinterpretq_s16_u16(highbd_convolve4_8_neon(
h_s5[0], h_s5[1], h_s5[2], h_s5[3], x_filter, max));
int16x8_t v_s6 = vreinterpretq_s16_u16(highbd_convolve4_8(
int16x8_t v_s6 = vreinterpretq_s16_u16(highbd_convolve4_8_neon(
h_s6[0], h_s6[1], h_s6[2], h_s6[3], x_filter, max));
uint16x8_t d0 = highbd_convolve4_8(v_s0, v_s1, v_s2, v_s3, y_filter, max);
uint16x8_t d1 = highbd_convolve4_8(v_s1, v_s2, v_s3, v_s4, y_filter, max);
uint16x8_t d2 = highbd_convolve4_8(v_s2, v_s3, v_s4, v_s5, y_filter, max);
uint16x8_t d3 = highbd_convolve4_8(v_s3, v_s4, v_s5, v_s6, y_filter, max);
uint16x8_t d0 =
highbd_convolve4_8_neon(v_s0, v_s1, v_s2, v_s3, y_filter, max);
uint16x8_t d1 =
highbd_convolve4_8_neon(v_s1, v_s2, v_s3, v_s4, y_filter, max);
uint16x8_t d2 =
highbd_convolve4_8_neon(v_s2, v_s3, v_s4, v_s5, y_filter, max);
uint16x8_t d3 =
highbd_convolve4_8_neon(v_s3, v_s4, v_s5, v_s6, y_filter, max);
store_u16_8x4(d, dst_stride, d0, d1, d2, d3);

View File

@ -15,6 +15,7 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/arm/highbd_convolve8_sve.h"
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/transpose_neon.h"
#include "vpx_dsp/arm/vpx_neon_sve_bridge.h"
@ -22,87 +23,6 @@
DECLARE_ALIGNED(16, static const uint16_t, kTblConv4_8[8]) = { 0, 2, 4, 6,
1, 3, 5, 7 };
static INLINE uint16x4_t highbd_convolve4_4(const int16x4_t s[4],
const int16x8_t filter,
const uint16x4_t max) {
int16x8_t s01 = vcombine_s16(s[0], s[1]);
int16x8_t s23 = vcombine_s16(s[2], s[3]);
int64x2_t sum01 = vpx_dotq_lane_s16(vdupq_n_s64(0), s01, filter, 0);
int64x2_t sum23 = vpx_dotq_lane_s16(vdupq_n_s64(0), s23, filter, 0);
int32x4_t res_s32 = vcombine_s32(vmovn_s64(sum01), vmovn_s64(sum23));
uint16x4_t res_u16 = vqrshrun_n_s32(res_s32, FILTER_BITS);
return vmin_u16(res_u16, max);
}
static INLINE uint16x8_t highbd_convolve4_8(const int16x8_t s[4],
const int16x8_t filter,
const uint16x8_t max,
uint16x8_t idx) {
int64x2_t sum04 = vpx_dotq_lane_s16(vdupq_n_s64(0), s[0], filter, 0);
int64x2_t sum15 = vpx_dotq_lane_s16(vdupq_n_s64(0), s[1], filter, 0);
int64x2_t sum26 = vpx_dotq_lane_s16(vdupq_n_s64(0), s[2], filter, 0);
int64x2_t sum37 = vpx_dotq_lane_s16(vdupq_n_s64(0), s[3], filter, 0);
int32x4_t res0 = vcombine_s32(vmovn_s64(sum04), vmovn_s64(sum15));
int32x4_t res1 = vcombine_s32(vmovn_s64(sum26), vmovn_s64(sum37));
uint16x8_t res = vcombine_u16(vqrshrun_n_s32(res0, FILTER_BITS),
vqrshrun_n_s32(res1, FILTER_BITS));
res = vpx_tbl_u16(res, idx);
return vminq_u16(res, max);
}
static INLINE uint16x4_t highbd_convolve8_4(const int16x8_t s[4],
const int16x8_t filter,
const uint16x4_t max) {
int64x2_t sum[4];
sum[0] = vpx_dotq_s16(vdupq_n_s64(0), s[0], filter);
sum[1] = vpx_dotq_s16(vdupq_n_s64(0), s[1], filter);
sum[2] = vpx_dotq_s16(vdupq_n_s64(0), s[2], filter);
sum[3] = vpx_dotq_s16(vdupq_n_s64(0), s[3], filter);
sum[0] = vpaddq_s64(sum[0], sum[1]);
sum[2] = vpaddq_s64(sum[2], sum[3]);
int32x4_t res_s32 = vcombine_s32(vmovn_s64(sum[0]), vmovn_s64(sum[2]));
uint16x4_t res_u16 = vqrshrun_n_s32(res_s32, FILTER_BITS);
return vmin_u16(res_u16, max);
}
static INLINE uint16x8_t highbd_convolve8_8(const int16x8_t s[8],
const int16x8_t filter,
const uint16x8_t max) {
int64x2_t sum[8];
sum[0] = vpx_dotq_s16(vdupq_n_s64(0), s[0], filter);
sum[1] = vpx_dotq_s16(vdupq_n_s64(0), s[1], filter);
sum[2] = vpx_dotq_s16(vdupq_n_s64(0), s[2], filter);
sum[3] = vpx_dotq_s16(vdupq_n_s64(0), s[3], filter);
sum[4] = vpx_dotq_s16(vdupq_n_s64(0), s[4], filter);
sum[5] = vpx_dotq_s16(vdupq_n_s64(0), s[5], filter);
sum[6] = vpx_dotq_s16(vdupq_n_s64(0), s[6], filter);
sum[7] = vpx_dotq_s16(vdupq_n_s64(0), s[7], filter);
int64x2_t sum01 = vpaddq_s64(sum[0], sum[1]);
int64x2_t sum23 = vpaddq_s64(sum[2], sum[3]);
int64x2_t sum45 = vpaddq_s64(sum[4], sum[5]);
int64x2_t sum67 = vpaddq_s64(sum[6], sum[7]);
int32x4_t res0 = vcombine_s32(vmovn_s64(sum01), vmovn_s64(sum23));
int32x4_t res1 = vcombine_s32(vmovn_s64(sum45), vmovn_s64(sum67));
uint16x8_t res = vcombine_u16(vqrshrun_n_s32(res0, FILTER_BITS),
vqrshrun_n_s32(res1, FILTER_BITS));
return vminq_u16(res, max);
}
static INLINE void highbd_convolve_4tap_horiz_sve(
const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst,
ptrdiff_t dst_stride, int w, int h, const int16x4_t filters, int bd) {
@ -120,10 +40,10 @@ static INLINE void highbd_convolve_4tap_horiz_sve(
load_s16_4x4(s + 2 * src_stride, 1, &s2[0], &s2[1], &s2[2], &s2[3]);
load_s16_4x4(s + 3 * src_stride, 1, &s3[0], &s3[1], &s3[2], &s3[3]);
uint16x4_t d0 = highbd_convolve4_4(s0, filter, max);
uint16x4_t d1 = highbd_convolve4_4(s1, filter, max);
uint16x4_t d2 = highbd_convolve4_4(s2, filter, max);
uint16x4_t d3 = highbd_convolve4_4(s3, filter, max);
uint16x4_t d0 = highbd_convolve4_4_sve(s0, filter, max);
uint16x4_t d1 = highbd_convolve4_4_sve(s1, filter, max);
uint16x4_t d2 = highbd_convolve4_4_sve(s2, filter, max);
uint16x4_t d3 = highbd_convolve4_4_sve(s3, filter, max);
store_u16_4x4(d, dst_stride, d0, d1, d2, d3);
@ -147,10 +67,10 @@ static INLINE void highbd_convolve_4tap_horiz_sve(
load_s16_8x4(s + 2 * src_stride, 1, &s2[0], &s2[1], &s2[2], &s2[3]);
load_s16_8x4(s + 3 * src_stride, 1, &s3[0], &s3[1], &s3[2], &s3[3]);
uint16x8_t d0 = highbd_convolve4_8(s0, filter, max, idx);
uint16x8_t d1 = highbd_convolve4_8(s1, filter, max, idx);
uint16x8_t d2 = highbd_convolve4_8(s2, filter, max, idx);
uint16x8_t d3 = highbd_convolve4_8(s3, filter, max, idx);
uint16x8_t d0 = highbd_convolve4_8_sve(s0, filter, max, idx);
uint16x8_t d1 = highbd_convolve4_8_sve(s1, filter, max, idx);
uint16x8_t d2 = highbd_convolve4_8_sve(s2, filter, max, idx);
uint16x8_t d3 = highbd_convolve4_8_sve(s3, filter, max, idx);
store_u16_8x4(d, dst_stride, d0, d1, d2, d3);

View File

@ -15,6 +15,8 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/arm/highbd_convolve8_neon.h"
#include "vpx_dsp/arm/highbd_convolve8_sve.h"
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/transpose_neon.h"
#include "vpx_dsp/arm/vpx_neon_sve_bridge.h"
@ -31,6 +33,9 @@ DECLARE_ALIGNED(16, static const uint16_t, kDotProdMergeBlockTbl[24]) = {
};
// clang-format on
DECLARE_ALIGNED(16, static const uint16_t, kTblConv4_8[8]) = { 0, 2, 4, 6,
1, 3, 5, 7 };
static INLINE void transpose_concat_4x4(const int16x4_t s0, const int16x4_t s1,
const int16x4_t s2, const int16x4_t s3,
int16x8_t res[2]) {
@ -450,3 +455,334 @@ void vpx_highbd_convolve8_avg_vert_sve2(const uint16_t *src,
} while (w != 0);
}
}
static INLINE void highbd_convolve_2d_4tap_sve2(
const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst,
ptrdiff_t dst_stride, int w, int h, const int16x4_t x_filters,
const int16x4_t y_filters, int bd) {
const int16x8_t x_filter = vcombine_s16(x_filters, vdup_n_s16(0));
if (w == 4) {
const uint16x4_t max = vdup_n_u16((1 << bd) - 1);
const int16_t *s = (const int16_t *)src;
uint16_t *d = dst;
int16x4_t h_s0[4], h_s1[4], h_s2[4];
load_s16_4x4(s + 0 * src_stride, 1, &h_s0[0], &h_s0[1], &h_s0[2], &h_s0[3]);
load_s16_4x4(s + 1 * src_stride, 1, &h_s1[0], &h_s1[1], &h_s1[2], &h_s1[3]);
load_s16_4x4(s + 2 * src_stride, 1, &h_s2[0], &h_s2[1], &h_s2[2], &h_s2[3]);
int16x4_t v_s0 =
vreinterpret_s16_u16(highbd_convolve4_4_sve(h_s0, x_filter, max));
int16x4_t v_s1 =
vreinterpret_s16_u16(highbd_convolve4_4_sve(h_s1, x_filter, max));
int16x4_t v_s2 =
vreinterpret_s16_u16(highbd_convolve4_4_sve(h_s2, x_filter, max));
s += 3 * src_stride;
do {
int16x4_t h_s3[4], h_s4[4], h_s5[4], h_s6[4];
load_s16_4x4(s + 0 * src_stride, 1, &h_s3[0], &h_s3[1], &h_s3[2],
&h_s3[3]);
load_s16_4x4(s + 1 * src_stride, 1, &h_s4[0], &h_s4[1], &h_s4[2],
&h_s4[3]);
load_s16_4x4(s + 2 * src_stride, 1, &h_s5[0], &h_s5[1], &h_s5[2],
&h_s5[3]);
load_s16_4x4(s + 3 * src_stride, 1, &h_s6[0], &h_s6[1], &h_s6[2],
&h_s6[3]);
int16x4_t v_s3 =
vreinterpret_s16_u16(highbd_convolve4_4_sve(h_s3, x_filter, max));
int16x4_t v_s4 =
vreinterpret_s16_u16(highbd_convolve4_4_sve(h_s4, x_filter, max));
int16x4_t v_s5 =
vreinterpret_s16_u16(highbd_convolve4_4_sve(h_s5, x_filter, max));
int16x4_t v_s6 =
vreinterpret_s16_u16(highbd_convolve4_4_sve(h_s6, x_filter, max));
uint16x4_t d0 =
highbd_convolve4_4_neon(v_s0, v_s1, v_s2, v_s3, y_filters, max);
uint16x4_t d1 =
highbd_convolve4_4_neon(v_s1, v_s2, v_s3, v_s4, y_filters, max);
uint16x4_t d2 =
highbd_convolve4_4_neon(v_s2, v_s3, v_s4, v_s5, y_filters, max);
uint16x4_t d3 =
highbd_convolve4_4_neon(v_s3, v_s4, v_s5, v_s6, y_filters, max);
store_u16_4x4(d, dst_stride, d0, d1, d2, d3);
v_s0 = v_s4;
v_s1 = v_s5;
v_s2 = v_s6;
s += 4 * src_stride;
d += 4 * dst_stride;
h -= 4;
} while (h != 0);
} else {
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
const uint16x8_t idx = vld1q_u16(kTblConv4_8);
do {
const int16_t *s = (const int16_t *)src;
uint16_t *d = dst;
int height = h;
int16x8_t h_s0[4], h_s1[4], h_s2[4];
load_s16_8x4(s + 0 * src_stride, 1, &h_s0[0], &h_s0[1], &h_s0[2],
&h_s0[3]);
load_s16_8x4(s + 1 * src_stride, 1, &h_s1[0], &h_s1[1], &h_s1[2],
&h_s1[3]);
load_s16_8x4(s + 2 * src_stride, 1, &h_s2[0], &h_s2[1], &h_s2[2],
&h_s2[3]);
int16x8_t v_s0 = vreinterpretq_s16_u16(
highbd_convolve4_8_sve(h_s0, x_filter, max, idx));
int16x8_t v_s1 = vreinterpretq_s16_u16(
highbd_convolve4_8_sve(h_s1, x_filter, max, idx));
int16x8_t v_s2 = vreinterpretq_s16_u16(
highbd_convolve4_8_sve(h_s2, x_filter, max, idx));
s += 3 * src_stride;
do {
int16x8_t h_s3[4], h_s4[4], h_s5[4], h_s6[4];
load_s16_8x4(s + 0 * src_stride, 1, &h_s3[0], &h_s3[1], &h_s3[2],
&h_s3[3]);
load_s16_8x4(s + 1 * src_stride, 1, &h_s4[0], &h_s4[1], &h_s4[2],
&h_s4[3]);
load_s16_8x4(s + 2 * src_stride, 1, &h_s5[0], &h_s5[1], &h_s5[2],
&h_s5[3]);
load_s16_8x4(s + 3 * src_stride, 1, &h_s6[0], &h_s6[1], &h_s6[2],
&h_s6[3]);
int16x8_t v_s3 = vreinterpretq_s16_u16(
highbd_convolve4_8_sve(h_s3, x_filter, max, idx));
int16x8_t v_s4 = vreinterpretq_s16_u16(
highbd_convolve4_8_sve(h_s4, x_filter, max, idx));
int16x8_t v_s5 = vreinterpretq_s16_u16(
highbd_convolve4_8_sve(h_s5, x_filter, max, idx));
int16x8_t v_s6 = vreinterpretq_s16_u16(
highbd_convolve4_8_sve(h_s6, x_filter, max, idx));
uint16x8_t d0 =
highbd_convolve4_8_neon(v_s0, v_s1, v_s2, v_s3, y_filters, max);
uint16x8_t d1 =
highbd_convolve4_8_neon(v_s1, v_s2, v_s3, v_s4, y_filters, max);
uint16x8_t d2 =
highbd_convolve4_8_neon(v_s2, v_s3, v_s4, v_s5, y_filters, max);
uint16x8_t d3 =
highbd_convolve4_8_neon(v_s3, v_s4, v_s5, v_s6, y_filters, max);
store_u16_8x4(d, dst_stride, d0, d1, d2, d3);
v_s0 = v_s4;
v_s1 = v_s5;
v_s2 = v_s6;
s += 4 * src_stride;
d += 4 * dst_stride;
height -= 4;
} while (height != 0);
src += 8;
dst += 8;
w -= 8;
} while (w != 0);
}
}
static INLINE void highbd_convolve8_2d_horiz_sve2(
const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst,
ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4,
int y0_q4, int y_step_q4, int w, int h, int bd) {
assert((intptr_t)dst % 4 == 0);
assert(dst_stride % 4 == 0);
assert(x_step_q4 == 16);
assert(h % 4 == 3 && h >= 7);
(void)x_step_q4;
(void)y0_q4;
(void)y_step_q4;
const int16x8_t filters = vld1q_s16(filter[x0_q4]);
src -= 3;
if (w == 4) {
const uint16x4_t max = vdup_n_u16((1 << bd) - 1);
const int16_t *s = (const int16_t *)src;
uint16_t *d = dst;
do {
int16x8_t s0[4], s1[4], s2[4], s3[4];
load_s16_8x4(s + 0 * src_stride, 1, &s0[0], &s0[1], &s0[2], &s0[3]);
load_s16_8x4(s + 1 * src_stride, 1, &s1[0], &s1[1], &s1[2], &s1[3]);
load_s16_8x4(s + 2 * src_stride, 1, &s2[0], &s2[1], &s2[2], &s2[3]);
load_s16_8x4(s + 3 * src_stride, 1, &s3[0], &s3[1], &s3[2], &s3[3]);
uint16x4_t d0 = highbd_convolve8_4(s0, filters, max);
uint16x4_t d1 = highbd_convolve8_4(s1, filters, max);
uint16x4_t d2 = highbd_convolve8_4(s2, filters, max);
uint16x4_t d3 = highbd_convolve8_4(s3, filters, max);
store_u16_4x4(d, dst_stride, d0, d1, d2, d3);
s += 4 * src_stride;
d += 4 * dst_stride;
h -= 4;
} while (h != 3);
// Process final three rows (h % 4 == 3).
int16x8_t s0[4], s1[4], s2[4];
load_s16_8x4(s + 0 * src_stride, 1, &s0[0], &s0[1], &s0[2], &s0[3]);
load_s16_8x4(s + 1 * src_stride, 1, &s1[0], &s1[1], &s1[2], &s1[3]);
load_s16_8x4(s + 2 * src_stride, 1, &s2[0], &s2[1], &s2[2], &s2[3]);
uint16x4_t d0 = highbd_convolve8_4(s0, filters, max);
uint16x4_t d1 = highbd_convolve8_4(s1, filters, max);
uint16x4_t d2 = highbd_convolve8_4(s2, filters, max);
store_u16_4x3(d, dst_stride, d0, d1, d2);
} else {
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
do {
const int16_t *s = (const int16_t *)src;
uint16_t *d = dst;
int width = w;
do {
int16x8_t s0[8], s1[8], s2[8], s3[8];
load_s16_8x8(s + 0 * src_stride, 1, &s0[0], &s0[1], &s0[2], &s0[3],
&s0[4], &s0[5], &s0[6], &s0[7]);
load_s16_8x8(s + 1 * src_stride, 1, &s1[0], &s1[1], &s1[2], &s1[3],
&s1[4], &s1[5], &s1[6], &s1[7]);
load_s16_8x8(s + 2 * src_stride, 1, &s2[0], &s2[1], &s2[2], &s2[3],
&s2[4], &s2[5], &s2[6], &s2[7]);
load_s16_8x8(s + 3 * src_stride, 1, &s3[0], &s3[1], &s3[2], &s3[3],
&s3[4], &s3[5], &s3[6], &s3[7]);
uint16x8_t d0 = highbd_convolve8_8(s0, filters, max);
uint16x8_t d1 = highbd_convolve8_8(s1, filters, max);
uint16x8_t d2 = highbd_convolve8_8(s2, filters, max);
uint16x8_t d3 = highbd_convolve8_8(s3, filters, max);
store_u16_8x4(d, dst_stride, d0, d1, d2, d3);
s += 8;
d += 8;
width -= 8;
} while (width != 0);
src += 4 * src_stride;
dst += 4 * dst_stride;
h -= 4;
} while (h != 3);
// Process final three rows (h % 4 == 3).
const int16_t *s = (const int16_t *)src;
uint16_t *d = dst;
int width = w;
do {
int16x8_t s0[8], s1[8], s2[8];
load_s16_8x8(s + 0 * src_stride, 1, &s0[0], &s0[1], &s0[2], &s0[3],
&s0[4], &s0[5], &s0[6], &s0[7]);
load_s16_8x8(s + 1 * src_stride, 1, &s1[0], &s1[1], &s1[2], &s1[3],
&s1[4], &s1[5], &s1[6], &s1[7]);
load_s16_8x8(s + 2 * src_stride, 1, &s2[0], &s2[1], &s2[2], &s2[3],
&s2[4], &s2[5], &s2[6], &s2[7]);
uint16x8_t d0 = highbd_convolve8_8(s0, filters, max);
uint16x8_t d1 = highbd_convolve8_8(s1, filters, max);
uint16x8_t d2 = highbd_convolve8_8(s2, filters, max);
store_u16_8x3(d, dst_stride, d0, d1, d2);
s += 8;
d += 8;
width -= 8;
} while (width != 0);
}
}
void vpx_highbd_convolve8_sve2(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
const InterpKernel *filter, int x0_q4,
int x_step_q4, int y0_q4, int y_step_q4, int w,
int h, int bd) {
if (x_step_q4 != 16 || y_step_q4 != 16) {
vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4,
x_step_q4, y0_q4, y_step_q4, w, h, bd);
return;
}
assert(y_step_q4 == 16);
assert(x_step_q4 == 16);
const int horiz_filter_taps = vpx_get_filter_taps(filter[x0_q4]) <= 4 ? 4 : 8;
const int vert_filter_taps = vpx_get_filter_taps(filter[y0_q4]) <= 4 ? 4 : 8;
if (horiz_filter_taps == 4 || vert_filter_taps == 4) {
const ptrdiff_t horiz_offset = horiz_filter_taps / 2 - 1;
const ptrdiff_t vert_offset = (vert_filter_taps / 2 - 1) * src_stride;
const int16x4_t x_filter = vld1_s16(filter[x0_q4] + 2);
const int16x4_t y_filter = vld1_s16(filter[y0_q4] + 2);
highbd_convolve_2d_4tap_sve2(src - horiz_offset - vert_offset, src_stride,
dst, dst_stride, w, h, x_filter, y_filter, bd);
return;
}
// Given our constraints: w <= 64, h <= 64, taps <= 8 we can reduce the
// maximum buffer size to 64 * (64 + 7).
DECLARE_ALIGNED(32, uint16_t, im_block[64 * 71]);
const int im_stride = 64;
// Account for the vertical phase needing SUBPEL_TAPS / 2 - 1 lines prior
// and SUBPEL_TAPS / 2 lines post.
const int im_height = h + SUBPEL_TAPS - 1;
const ptrdiff_t border_offset = SUBPEL_TAPS / 2 - 1;
highbd_convolve8_2d_horiz_sve2(src - src_stride * border_offset, src_stride,
im_block, im_stride, filter, x0_q4, x_step_q4,
y0_q4, y_step_q4, w, im_height, bd);
// Step into the temporary buffer border_offset rows to get actual frame data.
vpx_highbd_convolve8_vert_sve2(im_block + im_stride * border_offset,
im_stride, dst, dst_stride, filter, x0_q4,
x_step_q4, y0_q4, y_step_q4, w, h, bd);
}
void vpx_highbd_convolve8_avg_sve2(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
const InterpKernel *filter, int x0_q4,
int x_step_q4, int y0_q4, int y_step_q4,
int w, int h, int bd) {
if (x_step_q4 != 16 || y_step_q4 != 16) {
vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4,
x_step_q4, y0_q4, y_step_q4, w, h, bd);
return;
}
assert(y_step_q4 == 16);
assert(x_step_q4 == 16);
// Given our constraints: w <= 64, h <= 64, taps <= 8 we can reduce the
// maximum buffer size to 64 * (64 + 7).
DECLARE_ALIGNED(32, uint16_t, im_block[64 * 71]);
const int im_stride = 64;
// Account for the vertical phase needing SUBPEL_TAPS / 2 - 1 lines prior
// and SUBPEL_TAPS / 2 lines post.
const int im_height = h + SUBPEL_TAPS - 1;
const ptrdiff_t border_offset = SUBPEL_TAPS / 2 - 1;
highbd_convolve8_2d_horiz_sve2(src - src_stride * border_offset, src_stride,
im_block, im_stride, filter, x0_q4, x_step_q4,
y0_q4, y_step_q4, w, im_height, bd);
// Step into the temporary buffer border_offset rows to get actual frame data.
vpx_highbd_convolve8_avg_vert_sve2(im_block + im_stride * border_offset,
im_stride, dst, dst_stride, filter, x0_q4,
x_step_q4, y0_q4, y_step_q4, w, h, bd);
}

View File

@ -18,6 +18,7 @@
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/transpose_neon.h"
#include "vpx_dsp/arm/vpx_convolve8_neon.h"
#include "vpx_dsp/vpx_filter.h"
#include "vpx_ports/mem.h"
static INLINE void scaledconvolve_horiz_neon(

View File

@ -9,6 +9,7 @@
*/
#include <assert.h>
#include <limits.h>
#include "./bitwriter.h"
@ -16,16 +17,20 @@
#include "vpx_util/vpx_debug_util.h"
#endif
void vpx_start_encode(vpx_writer *br, uint8_t *source) {
void vpx_start_encode(vpx_writer *br, uint8_t *source, size_t size) {
br->lowvalue = 0;
br->range = 255;
br->count = -24;
br->buffer = source;
br->error = 0;
br->pos = 0;
// Make sure it is safe to cast br->pos to int in vpx_write().
if (size > INT_MAX) size = INT_MAX;
br->size = (unsigned int)size;
br->buffer = source;
vpx_write_bit(br, 0);
}
void vpx_stop_encode(vpx_writer *br) {
int vpx_stop_encode(vpx_writer *br) {
int i;
#if CONFIG_BITSTREAM_DEBUG
@ -34,9 +39,17 @@ void vpx_stop_encode(vpx_writer *br) {
for (i = 0; i < 32; i++) vpx_write_bit(br, 0);
// Ensure there's no ambigous collision with any index marker bytes
if ((br->buffer[br->pos - 1] & 0xe0) == 0xc0) br->buffer[br->pos++] = 0;
if (!br->error && (br->buffer[br->pos - 1] & 0xe0) == 0xc0) {
if (br->pos < br->size) {
br->buffer[br->pos++] = 0;
} else {
br->error = 1;
}
}
#if CONFIG_BITSTREAM_DEBUG
bitstream_queue_set_skip_write(0);
#endif
return br->error ? -1 : 0;
}

View File

@ -29,12 +29,19 @@ typedef struct vpx_writer {
unsigned int lowvalue;
unsigned int range;
int count;
// Whether there has been an error.
int error;
// We maintain the invariant that pos <= size, i.e., we never write beyond
// the end of the buffer. If pos would be incremented to be greater than
// size, leave pos unchanged and set error to 1.
unsigned int pos;
unsigned int size;
uint8_t *buffer;
} vpx_writer;
void vpx_start_encode(vpx_writer *br, uint8_t *source);
void vpx_stop_encode(vpx_writer *br);
void vpx_start_encode(vpx_writer *br, uint8_t *source, size_t size);
// Returns 0 on success and returns -1 in case of error.
int vpx_stop_encode(vpx_writer *br);
static INLINE VPX_NO_UNSIGNED_SHIFT_CHECK void vpx_write(vpx_writer *br,
int bit,
@ -77,18 +84,25 @@ static INLINE VPX_NO_UNSIGNED_SHIFT_CHECK void vpx_write(vpx_writer *br,
if (count >= 0) {
int offset = shift - count;
if ((lowvalue << (offset - 1)) & 0x80000000) {
int x = br->pos - 1;
if (!br->error) {
if ((lowvalue << (offset - 1)) & 0x80000000) {
int x = (int)br->pos - 1;
while (x >= 0 && br->buffer[x] == 0xff) {
br->buffer[x] = 0;
x--;
while (x >= 0 && br->buffer[x] == 0xff) {
br->buffer[x] = 0;
x--;
}
// TODO(wtc): How to prove x >= 0?
br->buffer[x] += 1;
}
br->buffer[x] += 1;
if (br->pos < br->size) {
br->buffer[br->pos++] = (lowvalue >> (24 - offset)) & 0xff;
} else {
br->error = 1;
}
}
br->buffer[br->pos++] = (lowvalue >> (24 - offset)) & 0xff;
lowvalue <<= offset;
shift = count;
lowvalue &= 0xffffff;

View File

@ -8,24 +8,43 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
#include "./vpx_config.h"
#include "./bitwriter_buffer.h"
void vpx_wb_init(struct vpx_write_bit_buffer *wb, uint8_t *bit_buffer,
size_t size) {
wb->error = 0;
wb->bit_offset = 0;
wb->size = size;
wb->bit_buffer = bit_buffer;
}
int vpx_wb_has_error(const struct vpx_write_bit_buffer *wb) {
return wb->error;
}
size_t vpx_wb_bytes_written(const struct vpx_write_bit_buffer *wb) {
assert(!wb->error);
return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0);
}
void vpx_wb_write_bit(struct vpx_write_bit_buffer *wb, int bit) {
if (wb->error) return;
const int off = (int)wb->bit_offset;
const int p = off / CHAR_BIT;
const int q = CHAR_BIT - 1 - off % CHAR_BIT;
if ((size_t)p >= wb->size) {
wb->error = 1;
return;
}
if (q == CHAR_BIT - 1) {
wb->bit_buffer[p] = bit << q;
} else {
wb->bit_buffer[p] &= ~(1 << q);
assert((wb->bit_buffer[p] & (1 << q)) == 0);
wb->bit_buffer[p] |= bit << q;
}
wb->bit_offset = off + 1;

View File

@ -18,10 +18,24 @@ extern "C" {
#endif
struct vpx_write_bit_buffer {
uint8_t *bit_buffer;
// Whether there has been an error.
int error;
// We maintain the invariant that bit_offset <= size * CHAR_BIT, i.e., we
// never write beyond the end of bit_buffer. If bit_offset would be
// incremented to be greater than size * CHAR_BIT, leave bit_offset unchanged
// and set error to 1.
size_t bit_offset;
// Size of bit_buffer in bytes.
size_t size;
uint8_t *bit_buffer;
};
void vpx_wb_init(struct vpx_write_bit_buffer *wb, uint8_t *bit_buffer,
size_t size);
int vpx_wb_has_error(const struct vpx_write_bit_buffer *wb);
// Must not be called if vpx_wb_has_error(wb) returns true.
size_t vpx_wb_bytes_written(const struct vpx_write_bit_buffer *wb);
void vpx_wb_write_bit(struct vpx_write_bit_buffer *wb, int bit);

View File

@ -424,7 +424,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_convolve_avg sse2 avx2 neon/;
add_proto qw/void vpx_highbd_convolve8/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd";
specialize qw/vpx_highbd_convolve8 avx2 neon/, "$sse2_x86_64";
specialize qw/vpx_highbd_convolve8 avx2 neon sve2/, "$sse2_x86_64";
add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd";
specialize qw/vpx_highbd_convolve8_horiz avx2 neon sve/, "$sse2_x86_64";
@ -433,7 +433,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_convolve8_vert avx2 neon sve2/, "$sse2_x86_64";
add_proto qw/void vpx_highbd_convolve8_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd";
specialize qw/vpx_highbd_convolve8_avg avx2 neon/, "$sse2_x86_64";
specialize qw/vpx_highbd_convolve8_avg avx2 neon sve2/, "$sse2_x86_64";
add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd";
specialize qw/vpx_highbd_convolve8_avg_horiz avx2 neon sve/, "$sse2_x86_64";

View File

@ -9,13 +9,14 @@
*/
#include "./vpx_config.h"
#include "arm_cpudetect.h"
#include "vpx_ports/arm.h"
#include "vpx_ports/arm_cpudetect.h"
#if defined(__APPLE__)
#include <sys/sysctl.h>
#endif
#if !CONFIG_RUNTIME_CPU_DETECT || defined(__OpenBSD__)
#if !CONFIG_RUNTIME_CPU_DETECT
static int arm_get_cpu_caps(void) {
// This function should actually be a no-op. There is no way to adjust any of
@ -28,7 +29,7 @@ static int arm_get_cpu_caps(void) {
return flags;
}
#elif defined(__APPLE__) // end !CONFIG_RUNTIME_CPU_DETECT || defined(__OpenBSD__)
#elif defined(__APPLE__) // end !CONFIG_RUNTIME_CPU_DETECT
// sysctlbyname() parameter documentation for instruction set characteristics:
// https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics

View File

@ -19,8 +19,6 @@
extern "C" {
#endif
#define MAX_NUM_THREADS 64
// State of the worker thread object
typedef enum {
VPX_WORKER_STATUS_NOT_OK = 0, // object is unusable

View File

@ -20,11 +20,11 @@ origin:
# Human-readable identifier for this version/release
# Generally "version NNN", "tag SSS", "bookmark SSS"
release: 7fb8ceccf92c35cd5131b05c0502916715ebc76b (Fri Mar 15 01:11:50 2024).
release: 85dafa9c61f99330f484e77297684b42af6ff37d (Tue Apr 16 18:51:27 2024).
# Revision to pull in
# Must be a long or short commit SHA (long preferred)
revision: 7fb8ceccf92c35cd5131b05c0502916715ebc76b
revision: 85dafa9c61f99330f484e77297684b42af6ff37d
# The package's license, where possible using the mnemonic from
# https://spdx.org/licenses/

View File

@ -201,7 +201,6 @@ files = {
'libvpx/vpx/src/vpx_decoder.c',
'libvpx/vpx/src/vpx_encoder.c',
'libvpx/vpx/src/vpx_image.c',
'libvpx/vpx/src/vpx_tpl.c',
'libvpx/vpx_dsp/add_noise.c',
'libvpx/vpx_dsp/avg.c',
'libvpx/vpx_dsp/bitreader.c',
@ -479,7 +478,6 @@ files = {
'libvpx/vpx/src/vpx_decoder.c',
'libvpx/vpx/src/vpx_encoder.c',
'libvpx/vpx/src/vpx_image.c',
'libvpx/vpx/src/vpx_tpl.c',
'libvpx/vpx_dsp/add_noise.c',
'libvpx/vpx_dsp/avg.c',
'libvpx/vpx_dsp/bitreader.c',
@ -757,7 +755,6 @@ files = {
'libvpx/vpx/src/vpx_decoder.c',
'libvpx/vpx/src/vpx_encoder.c',
'libvpx/vpx/src/vpx_image.c',
'libvpx/vpx/src/vpx_tpl.c',
'libvpx/vpx_dsp/add_noise.c',
'libvpx/vpx_dsp/avg.c',
'libvpx/vpx_dsp/bitreader.c',
@ -1034,7 +1031,6 @@ files = {
'libvpx/vpx/src/vpx_decoder.c',
'libvpx/vpx/src/vpx_encoder.c',
'libvpx/vpx/src/vpx_image.c',
'libvpx/vpx/src/vpx_tpl.c',
'libvpx/vpx_dsp/add_noise.c',
'libvpx/vpx_dsp/avg.c',
'libvpx/vpx_dsp/bitreader.c',
@ -1307,7 +1303,6 @@ files = {
'libvpx/vpx/src/vpx_decoder.c',
'libvpx/vpx/src/vpx_encoder.c',
'libvpx/vpx/src/vpx_image.c',
'libvpx/vpx/src/vpx_tpl.c',
'libvpx/vpx_dsp/add_noise.c',
'libvpx/vpx_dsp/avg.c',
'libvpx/vpx_dsp/bitreader.c',
@ -1580,7 +1575,6 @@ files = {
'libvpx/vpx/src/vpx_decoder.c',
'libvpx/vpx/src/vpx_encoder.c',
'libvpx/vpx/src/vpx_image.c',
'libvpx/vpx/src/vpx_tpl.c',
'libvpx/vpx_dsp/add_noise.c',
'libvpx/vpx_dsp/avg.c',
'libvpx/vpx_dsp/bitreader.c',
@ -1829,7 +1823,6 @@ files = {
'libvpx/vpx/src/vpx_decoder.c',
'libvpx/vpx/src/vpx_encoder.c',
'libvpx/vpx/src/vpx_image.c',
'libvpx/vpx/src/vpx_tpl.c',
'libvpx/vpx_dsp/arm/avg_neon.c',
'libvpx/vpx_dsp/arm/avg_pred_neon.c',
'libvpx/vpx_dsp/arm/fdct16x16_neon.c',
@ -2081,7 +2074,6 @@ files = {
'libvpx/vpx/src/vpx_decoder.c',
'libvpx/vpx/src/vpx_encoder.c',
'libvpx/vpx/src/vpx_image.c',
'libvpx/vpx/src/vpx_tpl.c',
'libvpx/vpx_dsp/arm/avg_neon.c',
'libvpx/vpx_dsp/arm/avg_pred_neon.c',
'libvpx/vpx_dsp/arm/fdct16x16_neon.c',
@ -2326,7 +2318,6 @@ files = {
'libvpx/vpx/src/vpx_decoder.c',
'libvpx/vpx/src/vpx_encoder.c',
'libvpx/vpx/src/vpx_image.c',
'libvpx/vpx/src/vpx_tpl.c',
'libvpx/vpx_dsp/arm/avg_neon.c',
'libvpx/vpx_dsp/arm/avg_pred_neon.c',
'libvpx/vpx_dsp/arm/fdct16x16_neon.c',
@ -2552,7 +2543,6 @@ files = {
'libvpx/vpx/src/vpx_decoder.c',
'libvpx/vpx/src/vpx_encoder.c',
'libvpx/vpx/src/vpx_image.c',
'libvpx/vpx/src/vpx_tpl.c',
'libvpx/vpx_dsp/avg.c',
'libvpx/vpx_dsp/bitreader.c',
'libvpx/vpx_dsp/bitreader_buffer.c',