Bug 1509327 - Update dav1d from upstream to 36b807a. r=TD-Linux

Differential Revision: https://phabricator.services.mozilla.com/D13426

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Alex Chronopoulos 2018-12-01 21:59:40 +00:00
parent 5d689219f2
commit 04fe7aa697
69 changed files with 6807 additions and 1897 deletions

View File

@ -21,4 +21,4 @@ To update to a fork, use
The last update was pulled from https://code.videolan.org/videolan/dav1d
The git commit ID used was 46e2a2d0cc451e1d6bb929f80088f8a7b8940dd0 (2018-10-25T16:51:31.000Z).
The git commit ID used was 197a19ad702d5e7472852efcde98feeb07f373e0 (2018-11-26T12:15:41.000Z).

View File

@ -20,7 +20,7 @@ origin:
# Human-readable identifier for this version/release
# Generally "version NNN", "tag SSS", "bookmark SSS"
release: commit 46e2a2d0cc451e1d6bb929f80088f8a7b8940dd0
release: commit 36b807afe75040d9953bf63f68b67e6cd2fe4fc0
# The package's license, where possible using the mnemonic from
# https://spdx.org/licenses/

View File

@ -9,7 +9,7 @@ build-debian:
- debian
- amd64
script:
- env CFLAGS='-Werror' meson build --buildtype release
- meson build --buildtype release --werror
- ninja -C build
- cd build && meson test -v
@ -20,7 +20,7 @@ build-debian-static:
- debian
- amd64
script:
- env CFLAGS='-Werror' meson build --buildtype release --default-library static
- meson build --buildtype release --default-library static --werror
- ninja -C build
- cd build && meson test -v
@ -30,12 +30,12 @@ build-win32:
tags:
- win32
script:
- env CFLAGS='-Werror'
meson build --buildtype release
--libdir lib
--prefix "$(pwd)/build/dav1d_install"
--cross-file /opt/crossfiles/i686-w64-mingw32.meson
-Ddefault_library=both
- meson build --buildtype release
--werror
--libdir lib
--prefix "$(pwd)/build/dav1d_install"
--cross-file /opt/crossfiles/i686-w64-mingw32.meson
-Ddefault_library=both
- ninja -C build
- ninja -C build install
artifacts:
@ -50,12 +50,12 @@ build-win64:
tags:
- win64
script:
- env CFLAGS='-Werror'
meson build --buildtype release
--libdir lib
--prefix "$(pwd)/build/dav1d_install"
--cross-file /opt/crossfiles/x86_64-w64-mingw32.meson
-Ddefault_library=both
- meson build --buildtype release
--werror
--libdir lib
--prefix "$(pwd)/build/dav1d_install"
--cross-file /opt/crossfiles/x86_64-w64-mingw32.meson
-Ddefault_library=both
- ninja -C build
- ninja -C build install
artifacts:
@ -66,19 +66,20 @@ build-win64:
build-debian-aarch64:
stage: build
image: registry.videolan.org:5000/dav1d-debian-unstable-aarch64:20181122182457
tags:
- aarch64
- debian
script:
- env CFLAGS='-Werror' meson build --buildtype release
- meson build --buildtype release --werror
- ninja -C build
- cd build && meson test -v
build-debian-aarch64-clang-5:
stage: build
image: registry.videolan.org:5000/dav1d-debian-unstable-aarch64:20181122182457
tags:
- aarch64
- clang5
- debian
script:
- env CC=clang-5.0 CFLAGS='-integrated-as' meson build --buildtype release
@ -90,18 +91,18 @@ build-macos:
tags:
- macos
script:
- env CFLAGS='-Werror' meson build --buildtype release -Ddefault_library=both
- meson build --buildtype release -Ddefault_library=both --werror
- ninja -C build
- cd build && meson test -v
build-debian-werror:
image: dav1d-debian-aarch64:201810240631
image: registry.videolan.org:5000/dav1d-debian-unstable-aarch64:20181122182457
stage: build
tags:
- aarch64
- debian
script:
- env CC='clang-7' CFLAGS='-Werror' meson build -Dbuild_tests=false
- env CC='clang-7' meson build --buildtype debug --werror
- ninja -C build
test-debian:
@ -122,3 +123,66 @@ test-debian:
- meson build --buildtype release -Dtestdata_tests=true
- ninja -C build
- cd build && time meson test -v
test-debian-asan:
image: registry.videolan.org:5000/dav1d-debian-unstable:20181114201132
stage: test
tags:
- debian
- amd64
cache:
key: testdata.git
paths:
- cache/dav1d-test-data.git/
variables:
ASAN_OPTIONS: 'detect_leaks=0'
script:
- test -d cache || mkdir cache
- test -d cache/dav1d-test-data.git && GIT_DIR=cache/dav1d-test-data.git git fetch --refmap=refs/heads/master:refs/heads/master origin master
- test -d cache/dav1d-test-data.git || git clone --bare https://code.videolan.org/videolan/dav1d-test-data.git cache/dav1d-test-data.git
- git clone cache/dav1d-test-data.git tests/dav1d-test-data
- meson build --buildtype debugoptimized -Dtestdata_tests=true -Db_sanitize=address -Dbuild_asm=false
- ninja -C build
- cd build && time meson test -v --setup=sanitizer
test-debian-msan:
image: registry.videolan.org:5000/dav1d-debian-unstable:20181114201132
stage: test
tags:
- debian
- amd64
cache:
key: testdata.git
paths:
- cache/dav1d-test-data.git/
variables:
MSAN_OPTIONS: 'exitcode=1'
script:
- test -d cache || mkdir cache
- test -d cache/dav1d-test-data.git && GIT_DIR=cache/dav1d-test-data.git git fetch --refmap=refs/heads/master:refs/heads/master origin master
- test -d cache/dav1d-test-data.git || git clone --bare https://code.videolan.org/videolan/dav1d-test-data.git cache/dav1d-test-data.git
- git clone cache/dav1d-test-data.git tests/dav1d-test-data
- env CC=clang meson build --buildtype debugoptimized -Dtestdata_tests=true -Db_sanitize=memory -Db_lundef=false -Dbuild_asm=false
- ninja -C build
- cd build && time meson test -v --setup=sanitizer
test-debian-ubsan:
image: registry.videolan.org:5000/dav1d-debian-unstable:20181114201132
stage: test
tags:
- debian
- amd64
cache:
key: testdata.git
paths:
- cache/dav1d-test-data.git/
variables:
UBSAN_OPTIONS: 'print_stacktrace=1:halt_on_error=1'
script:
- test -d cache || mkdir cache
- test -d cache/dav1d-test-data.git && GIT_DIR=cache/dav1d-test-data.git git fetch --refmap=refs/heads/master:refs/heads/master origin master
- test -d cache/dav1d-test-data.git || git clone --bare https://code.videolan.org/videolan/dav1d-test-data.git cache/dav1d-test-data.git
- git clone cache/dav1d-test-data.git tests/dav1d-test-data
- env CC=clang meson build --buildtype debugoptimized -Dtestdata_tests=true -Db_sanitize=undefined -Db_lundef=false -Dbuild_asm=false
- ninja -C build
- cd build && time meson test -v --setup=sanitizer

19
third_party/dav1d/doc/Doxyfile.in vendored Normal file
View File

@ -0,0 +1,19 @@
PROJECT_NAME = dav1d
OUTPUT_DIRECTORY = @DOXYGEN_OUTPUT@
STRIP_FROM_PATH = @DOXYGEN_STRIP@
OUTPUT_LANGUAGE = English
TAB_SIZE = 4
EXTRACT_ALL = YES
OPTIMIZE_OUTPUT_FOR_C = YES
DOXYFILE_ENCODING = UTF-8
TYPEDEF_HIDES_STRUCT = YES
QUIET = YES
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
INPUT = @DOXYGEN_INPUT@
FILE_PATTERNS = *.h
GENERATE_HTML = YES
GENERATE_LATEX = NO

42
third_party/dav1d/doc/meson.build vendored Normal file
View File

@ -0,0 +1,42 @@
# Copyright © 2018, VideoLAN and dav1d authors
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
doxygen = find_program('doxygen', required: false)
if doxygen.found()
conf_data = configuration_data()
conf_data.set('DOXYGEN_INPUT', join_paths(meson.source_root(), 'include/dav1d'))
conf_data.set('DOXYGEN_STRIP', join_paths(meson.source_root(), 'include'))
conf_data.set('DOXYGEN_OUTPUT', meson.current_build_dir())
doxyfile = configure_file(input: 'Doxyfile.in',
output: 'Doxyfile',
configuration: conf_data)
custom_target('doc',
build_by_default: false,
command: [doxygen, doxyfile],
output: ['html']
)
endif

View File

@ -25,8 +25,11 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __COMMON_H__
#define __COMMON_H__
#ifndef __DAV1D_COMMON_H__
#define __DAV1D_COMMON_H__
#include <stddef.h>
#include <stdint.h>
#ifndef DAV1D_API
#if defined _WIN32
@ -40,4 +43,19 @@
#endif
#endif
#endif // __COMMON_H__
/**
* Input packet metadata which are copied from the input data used to
* decode each image into the matching structure of the output image
* returned back to the user. Since these are metadata fields, they
* can be used for other purposes than the documented ones, they will
* still be passed from input data to output picture without being
* used internally.
*/
typedef struct Dav1dDataProps {
int64_t timestamp; ///< container timestamp of input data, INT64_MIN if unknown (default)
int64_t duration; ///< container duration of input data, 0 if unknown (default)
int64_t offset; ///< stream offset of input data, -1 if unknown (default)
size_t size; ///< packet size, default Dav1dData.sz
} Dav1dDataProps;
#endif // __DAV1D_COMMON_H__

View File

@ -37,6 +37,7 @@ typedef struct Dav1dData {
const uint8_t *data; ///< data pointer
size_t sz; ///< data size
struct Dav1dRef *ref; ///< allocation origin
Dav1dDataProps m;
} Dav1dData;
/**
@ -45,7 +46,7 @@ typedef struct Dav1dData {
* @param data Input context.
* @param sz Size of the data that should be allocated.
*
* @return Pointer to the allocated bufferon success. NULL on error.
* @return Pointer to the allocated buffer on success. NULL on error.
*/
DAV1D_API uint8_t * dav1d_data_create(Dav1dData *data, size_t sz);

View File

@ -41,10 +41,16 @@ extern "C" {
typedef struct Dav1dContext Dav1dContext;
typedef struct Dav1dRef Dav1dRef;
#define DAV1D_MAX_FRAME_THREADS 256
#define DAV1D_MAX_TILE_THREADS 64
typedef struct Dav1dSettings {
int n_frame_threads;
int n_tile_threads;
Dav1dPicAllocator allocator;
int apply_grain;
int operating_point; ///< select an operating point for scalable AV1 bitstreams (0 - 31)
int all_layers; ///< output all spatial layers of a scalable AV1 biststream
} Dav1dSettings;
/**
@ -73,6 +79,22 @@ DAV1D_API void dav1d_default_settings(Dav1dSettings *s);
*/
DAV1D_API int dav1d_open(Dav1dContext **c_out, const Dav1dSettings *s);
/**
* Parse a Sequence Header OBU from bitstream data.
*
* @param out Output Sequence Header.
* @param buf The data to be parser.
* @param sz Size of the data.
*
* @return 0 on success, or < 0 (a negative errno code) on error.
*
* @note It is safe to feed this function data containing other OBUs than a
* Sequence Header, as they will simply be ignored. If there is more than
* one Sequence Header OBU present, only the last will be returned.
*/
DAV1D_API int dav1d_parse_sequence_header(Dav1dSequenceHeader *out,
const uint8_t *buf, const size_t sz);
/**
* Feed bitstream data to the decoder.
*
@ -106,6 +128,39 @@ DAV1D_API int dav1d_send_data(Dav1dContext *c, Dav1dData *in);
*
* @note To drain buffered frames from the decoder (i.e. on end of stream),
* call this function until it returns -EAGAIN.
*
* @code{.c}
* Dav1dData data = { 0 };
* Dav1dPicture p = { 0 };
* int res;
*
* read_data(&data);
* do {
* res = dav1d_send_data(c, &data);
* // Keep going even if the function can't consume the current data
* packet. It eventually will after one or more frames have been
* returned in this loop.
* if (res < 0 && res != -EAGAIN)
* free_and_abort();
* res = dav1d_get_picture(c, &p);
* if (res < 0) {
* if (res != -EAGAIN)
* free_and_abort();
* } else
* output_and_unref_picture(&p);
* // Stay in the loop as long as there's data to consume.
* } while (data.sz || read_data(&data) == SUCCESS);
*
* // Handle EOS by draining all buffered frames.
* do {
* res = dav1d_get_picture(c, &p);
* if (res < 0) {
* if (res != -EAGAIN)
* free_and_abort();
* } else
* output_and_unref_picture(&p);
* } while (res == 0);
* @endcode
*/
DAV1D_API int dav1d_get_picture(Dav1dContext *c, Dav1dPicture *out);
@ -117,9 +172,14 @@ DAV1D_API int dav1d_get_picture(Dav1dContext *c, Dav1dPicture *out);
DAV1D_API void dav1d_close(Dav1dContext **c_out);
/**
* Flush all delayed frames in decoder, to be used when seeking.
* Flush all delayed frames in decoder and clear internal decoder state,
* to be used when seeking.
*
* @param c Input decoder instance.
*
* @note Decoding will start only after a valid sequence header OBU is
* delivered to dav1d_send_data().
*
*/
DAV1D_API void dav1d_flush(Dav1dContext *c);

View File

@ -0,0 +1,385 @@
/*
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2018, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __DAV1D_HEADERS_H__
#define __DAV1D_HEADERS_H__
// Constants from Section 3. "Symbols and abbreviated terms"
#define DAV1D_MAX_CDEF_STRENGTHS 8
#define DAV1D_MAX_OPERATING_POINTS 32
#define DAV1D_MAX_TILE_COLS 64
#define DAV1D_MAX_TILE_ROWS 64
#define DAV1D_MAX_SEGMENTS 8
#define DAV1D_NUM_REF_FRAMES 8
#define DAV1D_PRIMARY_REF_NONE 7
#define DAV1D_REFS_PER_FRAME 7
#define DAV1D_TOTAL_REFS_PER_FRAME (DAV1D_REFS_PER_FRAME + 1)
enum Dav1dTxfmMode {
DAV1D_TX_4X4_ONLY,
DAV1D_TX_LARGEST,
DAV1D_TX_SWITCHABLE,
DAV1D_N_TX_MODES,
};
enum Dav1dFilterMode {
DAV1D_FILTER_8TAP_REGULAR,
DAV1D_FILTER_8TAP_SMOOTH,
DAV1D_FILTER_8TAP_SHARP,
DAV1D_N_SWITCHABLE_FILTERS,
DAV1D_FILTER_BILINEAR = DAV1D_N_SWITCHABLE_FILTERS,
DAV1D_N_FILTERS,
DAV1D_FILTER_SWITCHABLE = DAV1D_N_FILTERS,
};
enum Dav1dAdaptiveBoolean {
DAV1D_OFF = 0,
DAV1D_ON = 1,
DAV1D_ADAPTIVE = 2,
};
enum Dav1dRestorationType {
DAV1D_RESTORATION_NONE,
DAV1D_RESTORATION_SWITCHABLE,
DAV1D_RESTORATION_WIENER,
DAV1D_RESTORATION_SGRPROJ,
};
enum Dav1dWarpedMotionType {
DAV1D_WM_TYPE_IDENTITY,
DAV1D_WM_TYPE_TRANSLATION,
DAV1D_WM_TYPE_ROT_ZOOM,
DAV1D_WM_TYPE_AFFINE,
};
typedef struct Dav1dWarpedMotionParams {
enum Dav1dWarpedMotionType type;
int32_t matrix[6];
union {
struct {
int16_t alpha, beta, gamma, delta;
};
int16_t abcd[4];
};
} Dav1dWarpedMotionParams;
enum Dav1dPixelLayout {
DAV1D_PIXEL_LAYOUT_I400, ///< monochrome
DAV1D_PIXEL_LAYOUT_I420, ///< 4:2:0 planar
DAV1D_PIXEL_LAYOUT_I422, ///< 4:2:2 planar
DAV1D_PIXEL_LAYOUT_I444, ///< 4:4:4 planar
};
enum Dav1dFrameType {
DAV1D_FRAME_TYPE_KEY = 0, ///< Key Intra frame
DAV1D_FRAME_TYPE_INTER = 1, ///< Inter frame
DAV1D_FRAME_TYPE_INTRA = 2, ///< Non key Intra frame
DAV1D_FRAME_TYPE_SWITCH = 3, ///< Switch Inter frame
};
enum Dav1dColorPrimaries {
DAV1D_COLOR_PRI_BT709 = 1,
DAV1D_COLOR_PRI_UNKNOWN = 2,
DAV1D_COLOR_PRI_BT470M = 4,
DAV1D_COLOR_PRI_BT470BG = 5,
DAV1D_COLOR_PRI_BT601 = 6,
DAV1D_COLOR_PRI_SMPTE240 = 7,
DAV1D_COLOR_PRI_FILM = 8,
DAV1D_COLOR_PRI_BT2020 = 9,
DAV1D_COLOR_PRI_XYZ = 10,
DAV1D_COLOR_PRI_SMPTE431 = 11,
DAV1D_COLOR_PRI_SMPTE432 = 12,
DAV1D_COLOR_PRI_EBU3213 = 22,
};
enum Dav1dTransferCharacteristics {
DAV1D_TRC_BT709 = 1,
DAV1D_TRC_UNKNOWN = 2,
DAV1D_TRC_BT470M = 4,
DAV1D_TRC_BT470BG = 5,
DAV1D_TRC_BT601 = 6,
DAV1D_TRC_SMPTE240 = 7,
DAV1D_TRC_LINEAR = 8,
DAV1D_TRC_LOG100 = 9, ///< logarithmic (100:1 range)
DAV1D_TRC_LOG100_SQRT10 = 10, ///< lograithmic (100*sqrt(10):1 range)
DAV1D_TRC_IEC61966 = 11,
DAV1D_TRC_BT1361 = 12,
DAV1D_TRC_SRGB = 13,
DAV1D_TRC_BT2020_10BIT = 14,
DAV1D_TRC_BT2020_12BIT = 15,
DAV1D_TRC_SMPTE2084 = 16, ///< PQ
DAV1D_TRC_SMPTE428 = 17,
DAV1D_TRC_HLG = 18, ///< hybrid log/gamma (BT.2100 / ARIB STD-B67)
};
enum Dav1dMatrixCoefficients {
DAV1D_MC_IDENTITY = 0,
DAV1D_MC_BT709 = 1,
DAV1D_MC_UNKNOWN = 2,
DAV1D_MC_FCC = 4,
DAV1D_MC_BT470BG = 5,
DAV1D_MC_BT601 = 6,
DAV1D_MC_SMPTE240 = 7,
DAV1D_MC_SMPTE_YCGCO = 8,
DAV1D_MC_BT2020_NCL = 9,
DAV1D_MC_BT2020_CL = 10,
DAV1D_MC_SMPTE2085 = 11,
DAV1D_MC_CHROMAT_NCL = 12, ///< Chromaticity-derived
DAV1D_MC_CHROMAT_CL = 13,
DAV1D_MC_ICTCP = 14,
};
enum Dav1dChromaSamplePosition {
DAV1D_CHR_UNKNOWN = 0,
DAV1D_CHR_VERTICAL = 1, ///< Horizontally co-located with luma(0, 0)
///< sample, between two vertical samples
DAV1D_CHR_COLOCATED = 2, ///< Co-located with luma(0, 0) sample
};
typedef struct Dav1dSequenceHeader {
/**
* Stream profile, 0 for 8-10 bits/component 4:2:0 or monochrome;
* 1 for 8-10 bits/component 4:4:4; 2 for 4:2:2 at any bits/component,
* or 12 bits/component at any chroma subsampling.
*/
int profile;
/**
* Maximum dimensions for this stream. In non-scalable streams, these
* are often the actual dimensions of the stream, although that is not
* a normative requirement.
*/
int max_width, max_height;
enum Dav1dPixelLayout layout; ///< format of the picture
enum Dav1dColorPrimaries pri; ///< color primaries (av1)
enum Dav1dTransferCharacteristics trc; ///< transfer characteristics (av1)
enum Dav1dMatrixCoefficients mtrx; ///< matrix coefficients (av1)
enum Dav1dChromaSamplePosition chr; ///< chroma sample position (av1)
/**
* Pixel data uses JPEG pixel range ([0,255] for 8bits) instead of
* MPEG pixel range ([16,235] for 8bits luma, [16,240] for 8bits chroma).
*/
int color_range;
int num_operating_points;
struct Dav1dSequenceHeaderOperatingPoint {
int major_level, minor_level;
int initial_display_delay;
int idc;
int tier;
int decoder_model_param_present;
int decoder_buffer_delay;
int encoder_buffer_delay;
int low_delay_mode;
int display_model_param_present;
} operating_points[DAV1D_MAX_OPERATING_POINTS];
int still_picture;
int reduced_still_picture_header;
int timing_info_present;
int num_units_in_tick;
int time_scale;
int equal_picture_interval;
int num_ticks_per_picture;
int decoder_model_info_present;
int encoder_decoder_buffer_delay_length;
int num_units_in_decoding_tick;
int buffer_removal_delay_length;
int frame_presentation_delay_length;
int display_model_info_present;
int width_n_bits, height_n_bits;
int frame_id_numbers_present;
int delta_frame_id_n_bits;
int frame_id_n_bits;
int sb128;
int filter_intra;
int intra_edge_filter;
int inter_intra;
int masked_compound;
int warped_motion;
int dual_filter;
int order_hint;
int jnt_comp;
int ref_frame_mvs;
enum Dav1dAdaptiveBoolean screen_content_tools;
enum Dav1dAdaptiveBoolean force_integer_mv;
int order_hint_n_bits;
int super_res;
int cdef;
int restoration;
/**
* 0, 1 and 2 mean 8, 10 or 12 bits/component, respectively. This is not
* exactly the same as 'hbd' from the spec; the spec's hbd distinguishes
* between 8 (0) and 10-12 (1) bits/component, and another element
* (twelve_bit) to distinguish between 10 and 12 bits/component. To get
* the spec's hbd, use !!our_hbd, and to get twelve_bit, use hbd == 2.
*/
int hbd;
int ss_hor, ss_ver, monochrome;
int color_description_present;
int separate_uv_delta_q;
int film_grain_present;
} Dav1dSequenceHeader;
typedef struct Dav1dSegmentationData {
int delta_q;
int delta_lf_y_v, delta_lf_y_h, delta_lf_u, delta_lf_v;
int ref;
int skip;
int globalmv;
} Dav1dSegmentationData;
typedef struct Dav1dSegmentationDataSet {
Dav1dSegmentationData d[DAV1D_MAX_SEGMENTS];
int preskip;
int last_active_segid;
} Dav1dSegmentationDataSet;
typedef struct Dav1dLoopfilterModeRefDeltas {
int mode_delta[2 /* is_zeromv */];
int ref_delta[DAV1D_TOTAL_REFS_PER_FRAME];
} Dav1dLoopfilterModeRefDeltas;
typedef struct Dav1dFilmGrainData {
uint16_t seed;
int num_y_points;
uint8_t y_points[14][2 /* value, scaling */];
int chroma_scaling_from_luma;
int num_uv_points[2];
uint8_t uv_points[2][10][2 /* value, scaling */];
int scaling_shift;
int ar_coeff_lag;
int8_t ar_coeffs_y[24];
int8_t ar_coeffs_uv[2][25];
int ar_coeff_shift;
int grain_scale_shift;
int uv_mult[2];
int uv_luma_mult[2];
int uv_offset[2];
int overlap_flag;
int clip_to_restricted_range;
} Dav1dFilmGrainData;
typedef struct Dav1dFrameHeader {
enum Dav1dFrameType frame_type; ///< type of the picture
int width[2 /* { coded_width, superresolution_upscaled_width } */], height;
int frame_offset; ///< frame number
struct {
int present, update;
Dav1dFilmGrainData data;
} film_grain; ///< film grain parameters
int temporal_id, spatial_id; ///< spatial and temporal id of the frame for SVC
int show_existing_frame;
int existing_frame_idx;
int frame_id;
int frame_presentation_delay;
int show_frame;
int showable_frame;
int error_resilient_mode;
int disable_cdf_update;
int allow_screen_content_tools;
int force_integer_mv;
int frame_size_override;
int primary_ref_frame;
int buffer_removal_time_present;
struct Dav1dFrameHeaderOperatingPoint {
int buffer_removal_time;
} operating_points[DAV1D_MAX_OPERATING_POINTS];
int refresh_frame_flags;
int render_width, render_height;
struct {
int width_scale_denominator;
int enabled;
} super_res;
int have_render_size;
int allow_intrabc;
int frame_ref_short_signaling;
int refidx[DAV1D_REFS_PER_FRAME];
int hp;
enum Dav1dFilterMode subpel_filter_mode;
int switchable_motion_mode;
int use_ref_frame_mvs;
int refresh_context;
struct {
int uniform;
unsigned n_bytes;
int min_log2_cols, max_log2_cols, log2_cols, cols;
int min_log2_rows, max_log2_rows, log2_rows, rows;
uint16_t col_start_sb[DAV1D_MAX_TILE_COLS + 1];
uint16_t row_start_sb[DAV1D_MAX_TILE_ROWS + 1];
int update;
} tiling;
struct {
int yac;
int ydc_delta;
int udc_delta, uac_delta, vdc_delta, vac_delta;
int qm, qm_y, qm_u, qm_v;
} quant;
struct {
int enabled, update_map, temporal, update_data;
Dav1dSegmentationDataSet seg_data;
int lossless[DAV1D_MAX_SEGMENTS], qidx[DAV1D_MAX_SEGMENTS];
} segmentation;
struct {
struct {
int present;
int res_log2;
} q;
struct {
int present;
int res_log2;
int multi;
} lf;
} delta;
int all_lossless;
struct {
int level_y[2 /* dir */];
int level_u, level_v;
int mode_ref_delta_enabled;
int mode_ref_delta_update;
Dav1dLoopfilterModeRefDeltas mode_ref_deltas;
int sharpness;
} loopfilter;
struct {
int damping;
int n_bits;
int y_strength[DAV1D_MAX_CDEF_STRENGTHS];
int uv_strength[DAV1D_MAX_CDEF_STRENGTHS];
} cdef;
struct {
enum Dav1dRestorationType type[3 /* plane */];
int unit_size[2 /* y, uv */];
} restoration;
enum Dav1dTxfmMode txfm_mode;
int switchable_comp_refs;
int skip_mode_allowed, skip_mode_enabled, skip_mode_refs[2];
int warp_motion;
int reduced_txtp_set;
Dav1dWarpedMotionParams gmv[DAV1D_REFS_PER_FRAME];
} Dav1dFrameHeader;
#endif /* __DAV1D_HEADERS_H__ */

View File

@ -32,100 +32,18 @@
#include <stdint.h>
#include "common.h"
enum Dav1dPixelLayout {
DAV1D_PIXEL_LAYOUT_I400, ///< monochrome
DAV1D_PIXEL_LAYOUT_I420, ///< 4:2:0 planar
DAV1D_PIXEL_LAYOUT_I422, ///< 4:2:2 planar
DAV1D_PIXEL_LAYOUT_I444, ///< 4:4:4 planar
};
enum Dav1dFrameType {
DAV1D_FRAME_TYPE_KEY = 0, ///< Key Intra frame
DAV1D_FRAME_TYPE_INTER = 1, ///< Inter frame
DAV1D_FRAME_TYPE_INTRA = 2, ///< Non key Intra frame
DAV1D_FRAME_TYPE_SWITCH = 3, ///< Switch Inter frame
};
enum Dav1dColorPrimaries {
DAV1D_COLOR_PRI_BT709 = 1,
DAV1D_COLOR_PRI_UNKNOWN = 2,
DAV1D_COLOR_PRI_BT470M = 4,
DAV1D_COLOR_PRI_BT470BG = 5,
DAV1D_COLOR_PRI_BT601 = 6,
DAV1D_COLOR_PRI_SMPTE240 = 7,
DAV1D_COLOR_PRI_FILM = 8,
DAV1D_COLOR_PRI_BT2020 = 9,
DAV1D_COLOR_PRI_XYZ = 10,
DAV1D_COLOR_PRI_SMPTE431 = 11,
DAV1D_COLOR_PRI_SMPTE432 = 12,
DAV1D_COLOR_PRI_EBU3213 = 22,
};
enum Dav1dTransferCharacteristics {
DAV1D_TRC_BT709 = 1,
DAV1D_TRC_UNKNOWN = 2,
DAV1D_TRC_BT470M = 4,
DAV1D_TRC_BT470BG = 5,
DAV1D_TRC_BT601 = 6,
DAV1D_TRC_SMPTE240 = 7,
DAV1D_TRC_LINEAR = 8,
DAV1D_TRC_LOG100 = 9, ///< logarithmic (100:1 range)
DAV1D_TRC_LOG100_SQRT10 = 10, ///< lograithmic (100*sqrt(10):1 range)
DAV1D_TRC_IEC61966 = 11,
DAV1D_TRC_BT1361 = 12,
DAV1D_TRC_SRGB = 13,
DAV1D_TRC_BT2020_10BIT = 14,
DAV1D_TRC_BT2020_12BIT = 15,
DAV1D_TRC_SMPTE2084 = 16, ///< PQ
DAV1D_TRC_SMPTE428 = 17,
DAV1D_TRC_HLG = 18, ///< hybrid log/gamma (BT.2100 / ARIB STD-B67)
};
enum Dav1dMatrixCoefficients {
DAV1D_MC_IDENTITY = 0,
DAV1D_MC_BT709 = 1,
DAV1D_MC_UNKNOWN = 2,
DAV1D_MC_FCC = 4,
DAV1D_MC_BT470BG = 5,
DAV1D_MC_BT601 = 6,
DAV1D_MC_SMPTE240 = 7,
DAV1D_MC_SMPTE_YCGCO = 8,
DAV1D_MC_BT2020_NCL = 9,
DAV1D_MC_BT2020_CL = 10,
DAV1D_MC_SMPTE2085 = 11,
DAV1D_MC_CHROMAT_NCL = 12, ///< Chromaticity-derived
DAV1D_MC_CHROMAT_CL = 13,
DAV1D_MC_ICTCP = 14,
};
enum Dav1dChromaSamplePosition {
DAV1D_CHR_UNKNOWN = 0,
DAV1D_CHR_VERTICAL = 1, ///< Horizontally co-located with luma(0, 0)
///< sample, between two vertical samples
DAV1D_CHR_COLOCATED = 2, ///< Co-located with luma(0, 0) sample
};
#include "headers.h"
typedef struct Dav1dPictureParameters {
int w; ///< width (in pixels)
int h; ///< height (in pixels)
enum Dav1dPixelLayout layout; ///< format of the picture
enum Dav1dFrameType type; ///< type of the picture
int bpc; ///< bits per pixel component (8 or 10)
enum Dav1dColorPrimaries pri; ///< color primaries (av1)
enum Dav1dTransferCharacteristics trc; ///< transfer characteristics (av1)
enum Dav1dMatrixCoefficients mtrx; ///< matrix coefficients (av1)
enum Dav1dChromaSamplePosition chr; ///< chroma sample position (av1)
/**
* Pixel data uses JPEG pixel range ([0,255] for 8bits) instead of
* MPEG pixel range ([16,235] for 8bits luma, [16,240] for 8bits chroma).
*/
int fullrange;
} Dav1dPictureParameters;
typedef struct Dav1dPicture {
int poc; ///< frame number
Dav1dSequenceHeader *seq_hdr;
Dav1dFrameHeader *frame_hdr;
/**
* Pointers to planar image data (Y is [0], U is [1], V is [2]). The data
@ -135,7 +53,6 @@ typedef struct Dav1dPicture {
* zero'ed out.
*/
void *data[3];
struct Dav1dRef *ref; ///< allocation origin
/**
* Number of bytes between 2 lines in data[] for luma [0] or chroma [1].
@ -143,6 +60,8 @@ typedef struct Dav1dPicture {
ptrdiff_t stride[2];
Dav1dPictureParameters p;
Dav1dDataProps m;
struct Dav1dRef *frame_hdr_ref, *seq_hdr_ref, *ref; ///< allocation origins
void *allocator_data; ///< pointer managed by the allocator
} Dav1dPicture;
@ -152,7 +71,7 @@ typedef struct Dav1dPicAllocator {
/**
* Allocate the picture buffer based on the Dav1dPictureParameters.
*
* The data[0], data[1] and data[2] must be 32 bits aligned and with a
* The data[0], data[1] and data[2] must be 32 byte aligned and with a
* pixel width/height multiple of 128 pixels.
* data[1] and data[2] must share the same stride[1].
*
@ -170,14 +89,10 @@ typedef struct Dav1dPicAllocator {
/**
* Release the picture buffer.
*
* @param buf The buffer that was returned by
* alloc_picture_callback().
* @param allocator_tag The Dav1dPicture.allocator_data that was filled by
* alloc_picture_callback()
* @param cookie Custom pointer passed to all calls.
* @param pic The picture that was filled by alloc_picture_callback().
* @param cookie Custom pointer passed to all calls.
*/
void (*release_picture_callback)(uint8_t *buf, void *allocator_data,
void *cookie);
void (*release_picture_callback)(Dav1dPicture *pic, void *cookie);
} Dav1dPicAllocator;
/**

View File

@ -324,6 +324,8 @@ endif
subdir('include')
subdir('doc')
subdir('src')
subdir('tools')

View File

@ -0,0 +1,627 @@
/*
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2018, Martin Storsjo
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "src/arm/asm.S"
// void dav1d_wiener_filter_h_neon(int16_t *dst, const pixel (*left)[4],
// const pixel *src, ptrdiff_t stride,
// const int16_t fh[7], const intptr_t w,
// int h, enum LrEdgeFlags edges);
function wiener_filter_h_neon, export=1
mov w8, w5
ld1 {v0.8h}, [x4]
mov w9, #(1 << 14) - (1 << 2)
dup v30.8h, w9
movi v31.8h, #8, lsl #8
// Calculate mid_stride
add w10, w5, #7
bic w10, w10, #7
lsl w10, w10, #1
// Clear the last unused element of v0, to allow filtering a single
// pixel with one plain mul+addv.
ins v0.h[7], wzr
// Set up pointers for reading/writing alternate rows
add x12, x0, x10
lsl w10, w10, #1
add x13, x2, x3
lsl x3, x3, #1
// Subtract the width from mid_strid3
sub x10, x10, w5, uxtw #1
// For w >= 8, we read (w+5)&~7+8 pixels, for w < 8 we read 16 pixels.
cmp w5, #8
add w11, w5, #13
bic w11, w11, #7
b.ge 1f
mov w11, #16
1:
sub x3, x3, w11, uxtw
// Set up the src pointers to include the left edge, for LR_HAVE_LEFT, left == NULL
tst w7, #1 // LR_HAVE_LEFT
b.eq 2f
// LR_HAVE_LEFT
cbnz x1, 0f
// left == NULL
sub x2, x2, #3
sub x13, x13, #3
b 1f
0: // LR_HAVE_LEFT, left != NULL
2: // !LR_HAVE_LEFT, increase the stride.
// For this case we don't read the left 3 pixels from the src pointer,
// but shift it as if we had done that.
add x3, x3, #3
1: // Loop vertically
ld1 {v3.16b}, [x2], #16
ld1 {v5.16b}, [x13], #16
tst w7, #1 // LR_HAVE_LEFT
b.eq 0f
cbz x1, 2f
// LR_HAVE_LEFT, left != NULL
ld1 {v2.s}[3], [x1], #4
// Move x2/x13 back to account for the last 3 bytes we loaded earlier,
// which we'll shift out.
sub x2, x2, #3
sub x13, x13, #3
ld1 {v4.s}[3], [x1], #4
ext v3.16b, v2.16b, v3.16b, #13
ext v5.16b, v4.16b, v5.16b, #13
b 2f
0:
// !LR_HAVE_LEFT, fill v2 with the leftmost byte
// and shift v3 to have 3x the first byte at the front.
dup v2.16b, v3.b[0]
dup v4.16b, v5.b[0]
// Move x2 back to account for the last 3 bytes we loaded before,
// which we shifted out.
sub x2, x2, #3
sub x13, x13, #3
ext v3.16b, v2.16b, v3.16b, #13
ext v5.16b, v4.16b, v5.16b, #13
2:
uxtl v2.8h, v3.8b
uxtl2 v3.8h, v3.16b
uxtl v4.8h, v5.8b
uxtl2 v5.8h, v5.16b
tst w7, #2 // LR_HAVE_RIGHT
b.ne 4f
// If we'll need to pad the right edge, load that byte to pad with
// here since we can find it pretty easily from here.
sub w9, w5, #14
ldr b28, [x2, w9, sxtw]
ldr b29, [x13, w9, sxtw]
// Fill v28/v29 with the right padding pixel
dup v28.8b, v28.b[0]
dup v29.8b, v29.b[0]
uxtl v28.8h, v28.8b
uxtl v29.8h, v29.8b
3: // !LR_HAVE_RIGHT
// If we'll have to pad the right edge we need to quit early here.
cmp w5, #11
b.ge 4f // If w >= 11, all used input pixels are valid
cmp w5, #7
b.ge 5f // If w >= 7, we can filter 4 pixels
b 6f
4: // Loop horizontally
.macro filter wd
// Interleaving the mul/mla chains actually hurts performance
// significantly on Cortex A53, thus keeping mul/mla tightly
// chained like this.
ext v16.16b, v2.16b, v3.16b, #2
ext v17.16b, v2.16b, v3.16b, #4
ext v18.16b, v2.16b, v3.16b, #6
ext v19.16b, v2.16b, v3.16b, #8
ext v20.16b, v2.16b, v3.16b, #10
ext v21.16b, v2.16b, v3.16b, #12
mul v6\wd, v2\wd, v0.h[0]
mla v6\wd, v16\wd, v0.h[1]
mla v6\wd, v17\wd, v0.h[2]
mla v6\wd, v18\wd, v0.h[3]
mla v6\wd, v19\wd, v0.h[4]
mla v6\wd, v20\wd, v0.h[5]
mla v6\wd, v21\wd, v0.h[6]
ext v22.16b, v4.16b, v5.16b, #2
ext v23.16b, v4.16b, v5.16b, #4
ext v24.16b, v4.16b, v5.16b, #6
ext v25.16b, v4.16b, v5.16b, #8
ext v26.16b, v4.16b, v5.16b, #10
ext v27.16b, v4.16b, v5.16b, #12
mul v7\wd, v4\wd, v0.h[0]
mla v7\wd, v22\wd, v0.h[1]
mla v7\wd, v23\wd, v0.h[2]
mla v7\wd, v24\wd, v0.h[3]
mla v7\wd, v25\wd, v0.h[4]
mla v7\wd, v26\wd, v0.h[5]
mla v7\wd, v27\wd, v0.h[6]
shl v18\wd, v18\wd, #7
shl v24\wd, v24\wd, #7
sub v18\wd, v18\wd, v30\wd
sub v24\wd, v24\wd, v30\wd
sqadd v6\wd, v6\wd, v18\wd
sqadd v7\wd, v7\wd, v24\wd
sshr v6\wd, v6\wd, #3
sshr v7\wd, v7\wd, #3
add v6\wd, v6\wd, v31\wd
add v7\wd, v7\wd, v31\wd
.endm
filter .8h
st1 {v6.8h}, [x0], #16
st1 {v7.8h}, [x12], #16
subs w5, w5, #8
b.le 9f
tst w7, #2 // LR_HAVE_RIGHT
mov v2.16b, v3.16b
mov v4.16b, v5.16b
ld1 {v3.8b}, [x2], #8
ld1 {v5.8b}, [x13], #8
uxtl v3.8h, v3.8b
uxtl v5.8h, v5.8b
b.ne 4b // If we don't need to pad, just keep filtering.
b 3b // If we need to pad, check how many pixels we have left.
5: // Filter 4 pixels, 7 <= w < 11
filter .4h
st1 {v6.4h}, [x0], #8
st1 {v7.4h}, [x12], #8
subs w5, w5, #4 // 3 <= w < 7
ext v2.16b, v2.16b, v3.16b, #8
ext v3.16b, v3.16b, v3.16b, #8
ext v4.16b, v4.16b, v5.16b, #8
ext v5.16b, v5.16b, v5.16b, #8
6: // Pad the right edge and filter the last few pixels.
// w < 7, w+3 pixels valid in v2-v3
cmp w5, #5
b.lt 7f
b.gt 8f
// w == 5, 8 pixels valid in v2, v3 invalid
mov v3.16b, v28.16b
mov v5.16b, v29.16b
b 88f
7: // 1 <= w < 5, 4-7 pixels valid in v2
sub w9, w5, #1
// w9 = (pixels valid - 4)
adr x11, L(variable_shift_tbl)
ldrh w9, [x11, w9, uxtw #1]
sub x11, x11, w9, uxth
mov v3.16b, v28.16b
mov v5.16b, v29.16b
br x11
// Shift v2 right, shifting out invalid pixels,
// shift v2 left to the original offset, shifting in padding pixels.
44: // 4 pixels valid
ext v2.16b, v2.16b, v2.16b, #8
ext v2.16b, v2.16b, v3.16b, #8
ext v4.16b, v4.16b, v4.16b, #8
ext v4.16b, v4.16b, v5.16b, #8
b 88f
55: // 5 pixels valid
ext v2.16b, v2.16b, v2.16b, #10
ext v2.16b, v2.16b, v3.16b, #6
ext v4.16b, v4.16b, v4.16b, #10
ext v4.16b, v4.16b, v5.16b, #6
b 88f
66: // 6 pixels valid
ext v2.16b, v2.16b, v2.16b, #12
ext v2.16b, v2.16b, v3.16b, #4
ext v4.16b, v4.16b, v4.16b, #12
ext v4.16b, v4.16b, v5.16b, #4
b 88f
77: // 7 pixels valid
ext v2.16b, v2.16b, v2.16b, #14
ext v2.16b, v2.16b, v3.16b, #2
ext v4.16b, v4.16b, v4.16b, #14
ext v4.16b, v4.16b, v5.16b, #2
b 88f
L(variable_shift_tbl):
.hword L(variable_shift_tbl) - 44b
.hword L(variable_shift_tbl) - 55b
.hword L(variable_shift_tbl) - 66b
.hword L(variable_shift_tbl) - 77b
8: // w > 5, w == 6, 9 pixels valid in v2-v3, 1 pixel valid in v3
ins v28.h[0], v3.h[0]
ins v29.h[0], v5.h[0]
mov v3.16b, v28.16b
mov v5.16b, v29.16b
88:
// w < 7, v2-v3 padded properly
cmp w5, #4
b.lt 888f
// w >= 4, filter 4 pixels
filter .4h
st1 {v6.4h}, [x0], #8
st1 {v7.4h}, [x12], #8
subs w5, w5, #4 // 0 <= w < 4
ext v2.16b, v2.16b, v3.16b, #8
ext v4.16b, v4.16b, v5.16b, #8
b.eq 9f
888: // 1 <= w < 4, filter 1 pixel at a time
mul v6.8h, v2.8h, v0.8h
mul v7.8h, v4.8h, v0.8h
addv h6, v6.8h
addv h7, v7.8h
dup v16.4h, v2.h[3]
dup v17.4h, v4.h[3]
shl v16.4h, v16.4h, #7
shl v17.4h, v17.4h, #7
sub v16.4h, v16.4h, v30.4h
sub v17.4h, v17.4h, v30.4h
sqadd v6.4h, v6.4h, v16.4h
sqadd v7.4h, v7.4h, v17.4h
sshr v6.4h, v6.4h, #3
sshr v7.4h, v7.4h, #3
add v6.4h, v6.4h, v31.4h
add v7.4h, v7.4h, v31.4h
st1 {v6.h}[0], [x0], #2
st1 {v7.h}[0], [x12], #2
subs w5, w5, #1
ext v2.16b, v2.16b, v3.16b, #2
ext v4.16b, v4.16b, v5.16b, #2
b.gt 888b
9:
subs w6, w6, #2
b.le 0f
// Jump to the next row and loop horizontally
add x0, x0, x10
add x12, x12, x10
add x2, x2, x3
add x13, x13, x3
mov w5, w8
b 1b
0:
ret
.purgem filter
endfunc
// void dav1d_wiener_filter_v_neon(pixel *dst, ptrdiff_t stride,
// const int16_t *mid, int w, int h,
// const int16_t fv[7], enum LrEdgeFlags edges,
// ptrdiff_t mid_stride);
function wiener_filter_v_neon, export=1
mov w8, w4
ld1 {v0.8h}, [x5]
mov w9, #128
dup v1.8h, w9
add v1.8h, v1.8h, v0.8h
// Calculate the number of rows to move back when looping vertically
mov w11, w4
tst w6, #4 // LR_HAVE_TOP
b.eq 0f
sub x2, x2, x7, lsl #1
add w11, w11, #2
0:
tst w6, #8 // LR_HAVE_BOTTOM
b.eq 1f
add w11, w11, #2
1: // Start of horizontal loop; start one vertical filter slice.
// Load rows into v16-v19 and pad properly.
tst w6, #4 // LR_HAVE_TOP
ld1 {v16.8h}, [x2], x7
b.eq 2f
// LR_HAVE_TOP
ld1 {v18.8h}, [x2], x7
mov v17.16b, v16.16b
ld1 {v19.8h}, [x2], x7
b 3f
2: // !LR_HAVE_TOP
mov v17.16b, v16.16b
mov v18.16b, v16.16b
mov v19.16b, v16.16b
3:
cmp w4, #4
b.lt 5f
// Start filtering normally; fill in v20-v22 with unique rows.
ld1 {v20.8h}, [x2], x7
ld1 {v21.8h}, [x2], x7
ld1 {v22.8h}, [x2], x7
4:
.macro filter compare
subs w4, w4, #1
// Interleaving the mul/mla chains actually hurts performance
// significantly on Cortex A53, thus keeping mul/mla tightly
// chained like this.
smull v2.4s, v16.4h, v0.h[0]
smlal v2.4s, v17.4h, v0.h[1]
smlal v2.4s, v18.4h, v0.h[2]
smlal v2.4s, v19.4h, v1.h[3]
smlal v2.4s, v20.4h, v0.h[4]
smlal v2.4s, v21.4h, v0.h[5]
smlal v2.4s, v22.4h, v0.h[6]
smull2 v3.4s, v16.8h, v0.h[0]
smlal2 v3.4s, v17.8h, v0.h[1]
smlal2 v3.4s, v18.8h, v0.h[2]
smlal2 v3.4s, v19.8h, v1.h[3]
smlal2 v3.4s, v20.8h, v0.h[4]
smlal2 v3.4s, v21.8h, v0.h[5]
smlal2 v3.4s, v22.8h, v0.h[6]
sqrshrun v2.4h, v2.4s, #11
sqrshrun2 v2.8h, v3.4s, #11
sqxtun v2.8b, v2.8h
st1 {v2.8b}, [x0], x1
.if \compare
cmp w4, #4
.else
b.le 9f
.endif
mov v16.16b, v17.16b
mov v17.16b, v18.16b
mov v18.16b, v19.16b
mov v19.16b, v20.16b
mov v20.16b, v21.16b
mov v21.16b, v22.16b
.endm
filter 1
b.lt 7f
ld1 {v22.8h}, [x2], x7
b 4b
5: // Less than 4 rows in total; not all of v20-v21 are filled yet.
tst w6, #8 // LR_HAVE_BOTTOM
b.eq 6f
// LR_HAVE_BOTTOM
cmp w4, #2
// We load at least 2 rows in all cases.
ld1 {v20.8h}, [x2], x7
ld1 {v21.8h}, [x2], x7
b.gt 53f // 3 rows in total
b.eq 52f // 2 rows in total
51: // 1 row in total, v19 already loaded, load edge into v20-v22.
mov v22.16b, v21.16b
b 8f
52: // 2 rows in total, v19 already loaded, load v20 with content data
// and 2 rows of edge.
ld1 {v22.8h}, [x2], x7
mov v23.16b, v22.16b
b 8f
53:
// 3 rows in total, v19 already loaded, load v20 and v21 with content
// and 2 rows of edge.
ld1 {v22.8h}, [x2], x7
ld1 {v23.8h}, [x2], x7
mov v24.16b, v23.16b
b 8f
6:
// !LR_HAVE_BOTTOM
cmp w4, #2
b.gt 63f // 3 rows in total
b.eq 62f // 2 rows in total
61: // 1 row in total, v19 already loaded, pad that into v20-v22.
mov v20.16b, v19.16b
mov v21.16b, v19.16b
mov v22.16b, v19.16b
b 8f
62: // 2 rows in total, v19 already loaded, load v20 and pad that into v20-v23.
ld1 {v20.8h}, [x2], x7
mov v21.16b, v20.16b
mov v22.16b, v20.16b
mov v23.16b, v20.16b
b 8f
63:
// 3 rows in total, v19 already loaded, load v20 and v21 and pad v21 into v22-v24.
ld1 {v20.8h}, [x2], x7
ld1 {v21.8h}, [x2], x7
mov v22.16b, v21.16b
mov v23.16b, v21.16b
mov v24.16b, v21.16b
b 8f
7:
// All registers up to v21 are filled already, 3 valid rows left.
// < 4 valid rows left; fill in padding and filter the last
// few rows.
tst w6, #8 // LR_HAVE_BOTTOM
b.eq 71f
// LR_HAVE_BOTTOM; load 2 rows of edge.
ld1 {v22.8h}, [x2], x7
ld1 {v23.8h}, [x2], x7
mov v24.16b, v23.16b
b 8f
71:
// !LR_HAVE_BOTTOM, pad 3 rows
mov v22.16b, v21.16b
mov v23.16b, v21.16b
mov v24.16b, v21.16b
8: // At this point, all registers up to v22-v24 are loaded with
// edge/padding (depending on how many rows are left).
filter 0 // This branches to 9f when done
mov v22.16b, v23.16b
mov v23.16b, v24.16b
b 8b
9: // End of one vertical slice.
subs w3, w3, #8
b.le 0f
// Move pointers back up to the top and loop horizontally.
msub x0, x1, x8, x0
msub x2, x7, x11, x2
add x0, x0, #8
add x2, x2, #16
mov w4, w8
b 1b
0:
ret
.purgem filter
endfunc
// void dav1d_copy_narrow_neon(pixel *dst, ptrdiff_t stride,
// const pixel *src, int w, int h);
function copy_narrow_neon, export=1
adr x5, L(copy_narrow_tbl)
ldrh w6, [x5, w3, uxtw #1]
sub x5, x5, w6, uxth
br x5
10:
add x7, x0, x1
lsl x1, x1, #1
18:
cmp w4, #8
b.lt 110f
subs w4, w4, #8
ld1 {v0.8b}, [x2], #8
st1 {v0.b}[0], [x0], x1
st1 {v0.b}[1], [x7], x1
st1 {v0.b}[2], [x0], x1
st1 {v0.b}[3], [x7], x1
st1 {v0.b}[4], [x0], x1
st1 {v0.b}[5], [x7], x1
st1 {v0.b}[6], [x0], x1
st1 {v0.b}[7], [x7], x1
b.le 0f
b 18b
110:
asr x1, x1, #1
11:
subs w4, w4, #1
ld1 {v0.b}[0], [x2], #1
st1 {v0.b}[0], [x0], x1
b.ge 11b
0:
ret
20:
add x7, x0, x1
lsl x1, x1, #1
24:
cmp w4, #4
b.lt 210f
subs w4, w4, #4
ld1 {v0.4h}, [x2], #8
st1 {v0.h}[0], [x0], x1
st1 {v0.h}[1], [x7], x1
st1 {v0.h}[2], [x0], x1
st1 {v0.h}[3], [x7], x1
b.le 0f
b 24b
210:
asr x1, x1, #1
22:
subs w4, w4, #1
ld1 {v0.h}[0], [x2], #2
st1 {v0.h}[0], [x0], x1
b.ge 22b
0:
ret
30:
ldrh w5, [x2]
ldrb w6, [x2, #2]
add x2, x2, #3
subs w4, w4, #1
strh w5, [x0]
strb w6, [x0, #2]
add x0, x0, x1
b.gt 30b
ret
40:
add x7, x0, x1
lsl x1, x1, #1
42:
cmp w4, #2
b.lt 41f
subs w4, w4, #2
ld1 {v0.2s}, [x2], #8
st1 {v0.s}[0], [x0], x1
st1 {v0.s}[1], [x7], x1
b.le 0f
b 42b
41:
ld1 {v0.s}[0], [x2]
st1 {v0.s}[0], [x0]
0:
ret
50:
ldr w5, [x2]
ldrb w6, [x2, #4]
add x2, x2, #5
subs w4, w4, #1
str w5, [x0]
strb w6, [x0, #4]
add x0, x0, x1
b.gt 50b
ret
60:
ldr w5, [x2]
ldrh w6, [x2, #4]
add x2, x2, #6
subs w4, w4, #1
str w5, [x0]
strh w6, [x0, #4]
add x0, x0, x1
b.gt 60b
ret
70:
ldr w5, [x2]
ldrh w6, [x2, #4]
ldrb w7, [x2, #6]
add x2, x2, #7
subs w4, w4, #1
str w5, [x0]
strh w6, [x0, #4]
strb w7, [x0, #6]
add x0, x0, x1
b.gt 70b
ret
L(copy_narrow_tbl):
.hword 0
.hword L(copy_narrow_tbl) - 10b
.hword L(copy_narrow_tbl) - 20b
.hword L(copy_narrow_tbl) - 30b
.hword L(copy_narrow_tbl) - 40b
.hword L(copy_narrow_tbl) - 50b
.hword L(copy_narrow_tbl) - 60b
.hword L(copy_narrow_tbl) - 70b
endfunc

File diff suppressed because it is too large Load Diff

View File

@ -54,6 +54,14 @@
#endif
#endif
#if !defined(PIC)
#if defined(__PIC__)
#define PIC __PIC__
#elif defined(__pic__)
#define PIC __pic__
#endif
#endif
#ifndef PRIVATE_PREFIX
#define PRIVATE_PREFIX dav1d_
#endif
@ -121,4 +129,6 @@ EXTERN\name:
#define L(x) .L ## x
#endif
#define X(x) CONCAT(EXTERN, x)
#endif /* __DAV1D_SRC_ARM_ASM_S__ */

View File

@ -0,0 +1,106 @@
/*
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2018, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "src/cpu.h"
#include "src/looprestoration.h"
#include "common/attributes.h"
#include "common/intops.h"
#include "src/tables.h"
#if BITDEPTH == 8 && ARCH_AARCH64
// This calculates things slightly differently than the reference C version.
// This version calculates roughly this:
// int16_t sum = 0;
// for (int i = 0; i < 7; i++)
// sum += src[idx] * fh[i];
// int16_t sum2 = (src[x] << 7) - (1 << (BITDEPTH + 6)) + rounding_off_h;
// sum = iclip(sum + sum2, INT16_MIN, INT16_MAX) >> round_bits_h;
// sum += 2048;
void dav1d_wiener_filter_h_neon(int16_t *dst, const pixel (*left)[4],
const pixel *src, ptrdiff_t stride,
const int16_t fh[7], const intptr_t w,
int h, enum LrEdgeFlags edges);
// This calculates things slightly differently than the reference C version.
// This version calculates roughly this:
// fv[3] += 128;
// int32_t sum = 0;
// for (int i = 0; i < 7; i++)
// sum += mid[idx] * fv[i];
// sum = (sum + rounding_off_v) >> round_bits_v;
// This function assumes that the width is a multiple of 8.
void dav1d_wiener_filter_v_neon(pixel *dst, ptrdiff_t stride,
const int16_t *mid, int w, int h,
const int16_t fv[7], enum LrEdgeFlags edges,
ptrdiff_t mid_stride);
void dav1d_copy_narrow_neon(pixel *dst, ptrdiff_t stride,
const pixel *src, int w, int h);
static void wiener_filter_neon(pixel *const dst, const ptrdiff_t dst_stride,
const pixel (*const left)[4],
const pixel *lpf, const ptrdiff_t lpf_stride,
const int w, const int h, const int16_t fh[7],
const int16_t fv[7], const enum LrEdgeFlags edges)
{
ALIGN_STK_32(int16_t, mid, 68 * 384,);
int mid_stride = (w + 7) & ~7;
// Horizontal filter
dav1d_wiener_filter_h_neon(&mid[2 * mid_stride], left, dst, dst_stride,
fh, w, h, edges);
if (edges & LR_HAVE_TOP)
dav1d_wiener_filter_h_neon(mid, NULL, lpf, lpf_stride,
fh, w, 2, edges);
if (edges & LR_HAVE_BOTTOM)
dav1d_wiener_filter_h_neon(&mid[(2 + h) * mid_stride], NULL,
lpf + 6 * PXSTRIDE(lpf_stride), lpf_stride,
fh, w, 2, edges);
// Vertical filter
if (w >= 8)
dav1d_wiener_filter_v_neon(dst, dst_stride, &mid[2*mid_stride],
w & ~7, h, fv, edges, mid_stride * sizeof(*mid));
if (w & 7) {
// For uneven widths, do a full 8 pixel wide filtering into a temp
// buffer and copy out the narrow slice of pixels separately into dest.
ALIGN_STK_16(pixel, tmp, 64 * 8,);
dav1d_wiener_filter_v_neon(tmp, w & 7, &mid[2*mid_stride + (w & ~7)],
w & 7, h, fv, edges, mid_stride * sizeof(*mid));
dav1d_copy_narrow_neon(dst + (w & ~7), dst_stride, tmp, w & 7, h);
}
}
#endif
void bitfn(dav1d_loop_restoration_dsp_init_arm)(Dav1dLoopRestorationDSPContext *const c) {
const unsigned flags = dav1d_get_cpu_flags();
if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
#if BITDEPTH == 8 && ARCH_AARCH64
c->wiener = wiener_filter_neon;
#endif
}

View File

@ -30,16 +30,66 @@
#include "src/mc.h"
#include "src/cpu.h"
decl_mc_fn(dav1d_put_8tap_regular_8bpc_neon);
decl_mc_fn(dav1d_put_8tap_regular_smooth_8bpc_neon);
decl_mc_fn(dav1d_put_8tap_regular_sharp_8bpc_neon);
decl_mc_fn(dav1d_put_8tap_smooth_8bpc_neon);
decl_mc_fn(dav1d_put_8tap_smooth_regular_8bpc_neon);
decl_mc_fn(dav1d_put_8tap_smooth_sharp_8bpc_neon);
decl_mc_fn(dav1d_put_8tap_sharp_8bpc_neon);
decl_mc_fn(dav1d_put_8tap_sharp_regular_8bpc_neon);
decl_mc_fn(dav1d_put_8tap_sharp_smooth_8bpc_neon);
decl_mc_fn(dav1d_put_bilin_8bpc_neon);
decl_mct_fn(dav1d_prep_8tap_regular_8bpc_neon);
decl_mct_fn(dav1d_prep_8tap_regular_smooth_8bpc_neon);
decl_mct_fn(dav1d_prep_8tap_regular_sharp_8bpc_neon);
decl_mct_fn(dav1d_prep_8tap_smooth_8bpc_neon);
decl_mct_fn(dav1d_prep_8tap_smooth_regular_8bpc_neon);
decl_mct_fn(dav1d_prep_8tap_smooth_sharp_8bpc_neon);
decl_mct_fn(dav1d_prep_8tap_sharp_8bpc_neon);
decl_mct_fn(dav1d_prep_8tap_sharp_regular_8bpc_neon);
decl_mct_fn(dav1d_prep_8tap_sharp_smooth_8bpc_neon);
decl_mct_fn(dav1d_prep_bilin_8bpc_neon);
decl_avg_fn(dav1d_avg_8bpc_neon);
decl_w_avg_fn(dav1d_w_avg_8bpc_neon);
decl_mask_fn(dav1d_mask_8bpc_neon);
void bitfn(dav1d_mc_dsp_init_arm)(Dav1dMCDSPContext *const c) {
#define init_mc_fn(type, name, suffix) \
c->mc[type] = dav1d_put_##name##_8bpc_##suffix
#define init_mct_fn(type, name, suffix) \
c->mct[type] = dav1d_prep_##name##_8bpc_##suffix
const unsigned flags = dav1d_get_cpu_flags();
if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
#if BITDEPTH == 8
#if ARCH_AARCH64
init_mc_fn (FILTER_2D_8TAP_REGULAR, 8tap_regular, neon);
init_mc_fn (FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, neon);
init_mc_fn (FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp, neon);
init_mc_fn (FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, neon);
init_mc_fn (FILTER_2D_8TAP_SMOOTH, 8tap_smooth, neon);
init_mc_fn (FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp, neon);
init_mc_fn (FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular, neon);
init_mc_fn (FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, neon);
init_mc_fn (FILTER_2D_8TAP_SHARP, 8tap_sharp, neon);
init_mc_fn (FILTER_2D_BILINEAR, bilin, neon);
init_mct_fn(FILTER_2D_8TAP_REGULAR, 8tap_regular, neon);
init_mct_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, neon);
init_mct_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp, neon);
init_mct_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, neon);
init_mct_fn(FILTER_2D_8TAP_SMOOTH, 8tap_smooth, neon);
init_mct_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp, neon);
init_mct_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular, neon);
init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, neon);
init_mct_fn(FILTER_2D_8TAP_SHARP, 8tap_sharp, neon);
init_mct_fn(FILTER_2D_BILINEAR, bilin, neon);
#endif
c->avg = dav1d_avg_8bpc_neon;
c->w_avg = dav1d_w_avg_8bpc_neon;
c->mask = dav1d_mask_8bpc_neon;

View File

@ -88,8 +88,8 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
pixel *ptrs[3] = { p[0], p[1], p[2] };
const int sbsz = 16;
const int sb64w = f->sb128w << 1;
const int damping = f->frame_hdr.cdef.damping + BITDEPTH - 8;
const enum Dav1dPixelLayout layout = f->cur.p.p.layout;
const int damping = f->frame_hdr->cdef.damping + BITDEPTH - 8;
const enum Dav1dPixelLayout layout = f->cur.p.layout;
const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
const int has_chroma = layout != DAV1D_PIXEL_LAYOUT_I400;
const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
@ -106,7 +106,7 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
if (edges & HAVE_BOTTOM) {
// backup pre-filter data for next iteration
backup2lines(f->lf.cdef_line_ptr[!tf], ptrs, f->cur.p.stride,
backup2lines(f->lf.cdef_line_ptr[!tf], ptrs, f->cur.stride,
8, f->bw * 4, layout);
}
@ -119,15 +119,15 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx];
if (cdef_idx == -1 ||
(!f->frame_hdr.cdef.y_strength[cdef_idx] &&
!f->frame_hdr.cdef.uv_strength[cdef_idx]))
(!f->frame_hdr->cdef.y_strength[cdef_idx] &&
!f->frame_hdr->cdef.uv_strength[cdef_idx]))
{
last_skip = 1;
goto next_sb;
}
const int y_lvl = f->frame_hdr.cdef.y_strength[cdef_idx];
const int uv_lvl = f->frame_hdr.cdef.uv_strength[cdef_idx];
const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx];
const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx];
pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] };
for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw);
bx += 2, edges |= HAVE_LEFT)
@ -148,11 +148,11 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
if (last_skip && edges & HAVE_LEFT) {
// we didn't backup the prefilter data because it wasn't
// there, so do it here instead
backup2x8(lr_bak[bit], bptrs, f->cur.p.stride, 0, layout);
backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout);
}
if (edges & HAVE_RIGHT) {
// backup pre-filter data for next iteration
backup2x8(lr_bak[!bit], bptrs, f->cur.p.stride, 8, layout);
backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout);
}
// the actual filter
@ -165,10 +165,10 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
uv_sec_lvl += uv_sec_lvl == 3;
uv_sec_lvl <<= BITDEPTH - 8;
unsigned variance;
const int dir = dsp->cdef.dir(bptrs[0], f->cur.p.stride[0],
const int dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
&variance);
if (y_lvl) {
dsp->cdef.fb[0](bptrs[0], f->cur.p.stride[0], lr_bak[bit][0],
dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
(pixel *const [2]) {
&f->lf.cdef_line_ptr[tf][0][0][bx * 4],
&f->lf.cdef_line_ptr[tf][0][1][bx * 4],
@ -179,10 +179,10 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
}
if (uv_lvl && has_chroma) {
const int uvdir =
f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I422 ? dir :
f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I422 ? dir :
((uint8_t[]) { 7, 0, 2, 4, 5, 6, 6, 6 })[dir];
for (int pl = 1; pl <= 2; pl++) {
dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.p.stride[1],
dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1],
lr_bak[bit][pl],
(pixel *const [2]) {
&f->lf.cdef_line_ptr[tf][pl][0][bx * 4 >> ss_hor],
@ -209,9 +209,9 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
iptrs[2] += sbsz * 4 >> ss_hor;
}
ptrs[0] += 8 * PXSTRIDE(f->cur.p.stride[0]);
ptrs[1] += 8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
ptrs[2] += 8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);
ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
f->lf.top_pre_cdef_toggle ^= 1;
}
}

View File

@ -4072,7 +4072,7 @@ void dav1d_init_states(CdfThreadContext *const cdf, const int qidx) {
dav1d_cdf_thread_ref(cdf, &cdf_init[qcat]);
}
void dav1d_update_tile_cdf(const Av1FrameHeader *const hdr,
void dav1d_update_tile_cdf(const Dav1dFrameHeader *const hdr,
CdfContext *const dst,
const CdfContext *const src)
{
@ -4138,7 +4138,7 @@ void dav1d_update_tile_cdf(const Av1FrameHeader *const hdr,
update_cdf_4d(N_TX_SIZES, 2, 41 /*42*/, 4, coef.base_tok);
update_bit_2d(2, 3, coef.dc_sign);
update_cdf_4d(4, 2, 21, 4, coef.br_tok);
update_cdf_2d(3, NUM_SEGMENTS, m.seg_id);
update_cdf_2d(3, DAV1D_MAX_SEGMENTS, m.seg_id);
update_cdf_1d(8, m.cfl_sign);
update_cdf_2d(6, 16, m.cfl_alpha);
update_bit_0d(m.restore_wiener);
@ -4171,7 +4171,7 @@ void dav1d_update_tile_cdf(const Av1FrameHeader *const hdr,
update_bit_1d(3, m.skip_mode);
update_cdf_2d(4, N_INTRA_PRED_MODES, m.y_mode);
update_cdf_3d(2, 8, N_SWITCHABLE_FILTERS, m.filter);
update_cdf_3d(2, 8, DAV1D_N_SWITCHABLE_FILTERS, m.filter);
update_bit_1d(6, m.newmv_mode);
update_bit_1d(2, m.globalmv_mode);
update_bit_1d(6, m.refmv_mode);

View File

@ -40,7 +40,7 @@ typedef struct CdfModeContext {
uint16_t filter_intra[5 + 1];
uint16_t uv_mode[2][N_INTRA_PRED_MODES][N_UV_INTRA_PRED_MODES + 1];
uint16_t angle_delta[8][8];
uint16_t filter[2][8][N_SWITCHABLE_FILTERS + 1];
uint16_t filter[2][8][DAV1D_N_SWITCHABLE_FILTERS + 1];
uint16_t newmv_mode[6][2];
uint16_t globalmv_mode[2][2];
uint16_t refmv_mode[6][2];
@ -68,7 +68,7 @@ typedef struct CdfModeContext {
uint16_t skip_mode[3][2];
uint16_t partition[N_BL_LEVELS][4][N_PARTITIONS + 1];
uint16_t seg_pred[3][2];
uint16_t seg_id[3][NUM_SEGMENTS + 1];
uint16_t seg_id[3][DAV1D_MAX_SEGMENTS + 1];
uint16_t cfl_sign[8 + 1];
uint16_t cfl_alpha[6][16 + 1];
uint16_t restore_wiener[2];
@ -132,7 +132,7 @@ typedef struct CdfThreadContext {
} CdfThreadContext;
void dav1d_init_states(CdfThreadContext *cdf, int qidx);
void dav1d_update_tile_cdf(const Av1FrameHeader *hdr, CdfContext *dst,
void dav1d_update_tile_cdf(const Dav1dFrameHeader *hdr, CdfContext *dst,
const CdfContext *src);
void dav1d_cdf_thread_alloc(CdfThreadContext *cdf, struct thread_data *t);

View File

@ -30,6 +30,8 @@
#include "config.h"
#include "dav1d/common.h"
#if ARCH_AARCH64 || ARCH_ARM
#include "src/arm/cpu.h"
#elif ARCH_X86
@ -37,6 +39,6 @@
#endif
unsigned dav1d_get_cpu_flags(void);
void dav1d_set_cpu_flags_mask(const unsigned mask);
DAV1D_API void dav1d_set_cpu_flags_mask(const unsigned mask);
#endif /* __DAV1D_SRC_CPU_H__ */

View File

@ -28,6 +28,7 @@
#include "config.h"
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
@ -44,7 +45,10 @@ uint8_t * dav1d_data_create(Dav1dData *const buf, const size_t sz) {
buf->ref = dav1d_ref_create(sz);
if (!buf->ref) return NULL;
buf->data = buf->ref->const_data;
buf->sz = sz;
buf->sz = buf->m.size = sz;
buf->m.timestamp = INT64_MIN;
buf->m.duration = 0;
buf->m.offset = -1;
return buf->ref->data;
}
@ -60,7 +64,10 @@ int dav1d_data_wrap(Dav1dData *const buf, const uint8_t *const ptr, const size_t
buf->ref = dav1d_ref_wrap(ptr, free_callback, user_data);
if (!buf->ref) return -ENOMEM;
buf->data = ptr;
buf->sz = sz;
buf->sz = buf->m.size = sz;
buf->m.timestamp = INT64_MIN;
buf->m.duration = 0;
buf->m.offset = -1;
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -91,45 +91,48 @@ static inline int get_partition_ctx(const BlockContext *const a,
}
static inline unsigned cdf_element_prob(const uint16_t *const cdf, const int e) {
return (e > 0 ? cdf[e - 1] : 32768) - cdf[e];
assert(e > 0);
return cdf[e - 1] - cdf[e];
}
static inline unsigned gather_left_partition_prob(const uint16_t *const in,
const enum BlockLevel bl)
{
unsigned out = 32768;
out -= cdf_element_prob(in, PARTITION_H);
unsigned out = 0;
out += cdf_element_prob(in, PARTITION_H);
if (bl != BL_128X128)
out -= cdf_element_prob(in, PARTITION_H4);
out -= cdf_element_prob(in, PARTITION_SPLIT);
out -= cdf_element_prob(in, PARTITION_T_TOP_SPLIT);
out -= cdf_element_prob(in, PARTITION_T_BOTTOM_SPLIT);
out -= cdf_element_prob(in, PARTITION_T_LEFT_SPLIT);
return 32768 - out;
out += cdf_element_prob(in, PARTITION_H4);
// Exploit the fact that cdfs for PARTITION_SPLIT, PARTITION_T_TOP_SPLIT,
// PARTITION_T_BOTTOM_SPLIT and PARTITION_T_LEFT_SPLIT are neighbors.
out += in[PARTITION_SPLIT - 1] - in[PARTITION_T_LEFT_SPLIT];
return out;
}
static inline unsigned gather_top_partition_prob(const uint16_t *const in,
const enum BlockLevel bl)
{
unsigned out = 32768;
out -= cdf_element_prob(in, PARTITION_V);
unsigned out = 0;
if (bl != BL_128X128)
out -= cdf_element_prob(in, PARTITION_V4);
out -= cdf_element_prob(in, PARTITION_SPLIT);
out -= cdf_element_prob(in, PARTITION_T_TOP_SPLIT);
out -= cdf_element_prob(in, PARTITION_T_LEFT_SPLIT);
out -= cdf_element_prob(in, PARTITION_T_RIGHT_SPLIT);
return 32768 - out;
out += cdf_element_prob(in, PARTITION_V4);
// Exploit the fact that cdfs for PARTITION_T_LEFT_SPLIT and PARTITION_T_RIGHT_SPLIT,
// and PARTITION_V, PARTITION_SPLIT and PARTITION_T_TOP_SPLIT are neighbors.
out += in[PARTITION_T_LEFT_SPLIT - 1] - in[PARTITION_T_RIGHT_SPLIT];
out += in[PARTITION_V - 1] - in[PARTITION_T_TOP_SPLIT];
return out;
}
static inline enum TxfmTypeSet get_ext_txtp_set(const enum RectTxfmSize tx,
const int inter,
const Av1FrameHeader *const hdr,
const Dav1dFrameHeader *const hdr,
const int seg_id)
{
if (hdr->segmentation.lossless[seg_id]) {
assert(tx == (int) TX_4X4);
return TXTP_SET_LOSSLESS;
if (!hdr->segmentation.qidx[seg_id]) {
if (hdr->segmentation.lossless[seg_id]) {
assert(tx == (int) TX_4X4);
return TXTP_SET_LOSSLESS;
} else {
return TXTP_SET_DCT;
}
}
const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
@ -153,7 +156,7 @@ static inline enum TxfmTypeSet get_ext_txtp_set(const enum RectTxfmSize tx,
static inline enum TxfmType get_uv_intra_txtp(const enum IntraPredMode uv_mode,
const enum RectTxfmSize tx,
const Av1FrameHeader *const hdr,
const Dav1dFrameHeader *const hdr,
const int seg_id)
{
if (hdr->segmentation.lossless[seg_id]) {
@ -168,7 +171,7 @@ static inline enum TxfmType get_uv_intra_txtp(const enum IntraPredMode uv_mode,
static inline enum TxfmType get_uv_inter_txtp(const TxfmInfo *const uvt_dim,
const enum TxfmType ytxtp,
const Av1FrameHeader *const hdr,
const Dav1dFrameHeader *const hdr,
const int seg_id)
{
if (hdr->segmentation.lossless[seg_id]) {
@ -194,18 +197,18 @@ static inline int get_filter_ctx(const BlockContext *const a,
const int yb4, const int xb4)
{
const int a_filter = (a->ref[0][xb4] == ref || a->ref[1][xb4] == ref) ?
a->filter[dir][xb4] : N_SWITCHABLE_FILTERS;
a->filter[dir][xb4] : DAV1D_N_SWITCHABLE_FILTERS;
const int l_filter = (l->ref[0][yb4] == ref || l->ref[1][yb4] == ref) ?
l->filter[dir][yb4] : N_SWITCHABLE_FILTERS;
l->filter[dir][yb4] : DAV1D_N_SWITCHABLE_FILTERS;
if (a_filter == l_filter) {
return comp * 4 + a_filter;
} else if (a_filter == N_SWITCHABLE_FILTERS) {
} else if (a_filter == DAV1D_N_SWITCHABLE_FILTERS) {
return comp * 4 + l_filter;
} else if (l_filter == N_SWITCHABLE_FILTERS) {
} else if (l_filter == DAV1D_N_SWITCHABLE_FILTERS) {
return comp * 4 + a_filter;
} else {
return comp * 4 + N_SWITCHABLE_FILTERS;
return comp * 4 + DAV1D_N_SWITCHABLE_FILTERS;
}
}
@ -716,18 +719,18 @@ static inline int get_br_ctx(const uint8_t *const levels,
return mag + 14;
}
static inline mv get_gmv_2d(const WarpedMotionParams *const gmv,
static inline mv get_gmv_2d(const Dav1dWarpedMotionParams *const gmv,
const int bx4, const int by4,
const int bw4, const int bh4,
const Av1FrameHeader *const hdr)
const Dav1dFrameHeader *const hdr)
{
switch (gmv->type) {
case WM_TYPE_ROT_ZOOM:
case DAV1D_WM_TYPE_ROT_ZOOM:
assert(gmv->matrix[5] == gmv->matrix[2]);
assert(gmv->matrix[4] == -gmv->matrix[3]);
// fall-through
default:
case WM_TYPE_AFFINE: {
case DAV1D_WM_TYPE_AFFINE: {
const int x = bx4 * 4 + bw4 * 2 - 1;
const int y = by4 * 4 + bh4 * 2 - 1;
const int xc = (gmv->matrix[2] - (1 << 16)) * x +
@ -741,12 +744,12 @@ static inline mv get_gmv_2d(const WarpedMotionParams *const gmv,
.x = apply_sign(((abs(xc) + round) >> shift) << !hdr->hp, xc),
};
}
case WM_TYPE_TRANSLATION:
case DAV1D_WM_TYPE_TRANSLATION:
return (mv) {
.y = gmv->matrix[0] >> 13,
.x = gmv->matrix[1] >> 13,
};
case WM_TYPE_IDENTITY:
case DAV1D_WM_TYPE_IDENTITY:
return (mv) { .x = 0, .y = 0 };
}
}

39
third_party/dav1d/src/film_grain.h vendored Normal file
View File

@ -0,0 +1,39 @@
/*
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2018, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __DAV1D_SRC_FILM_GRAIN_H__
#define __DAV1D_SRC_FILM_GRAIN_H__
#include "dav1d/dav1d.h"
void dav1d_apply_grain_8bpc(Dav1dPicture *const out,
const Dav1dPicture *const in);
void dav1d_apply_grain_10bpc(Dav1dPicture *const out,
const Dav1dPicture *const in);
#endif /* __DAV1D_SRC_FILM_GRAIN_H__ */

512
third_party/dav1d/src/film_grain_tmpl.c vendored Normal file
View File

@ -0,0 +1,512 @@
/*
* Copyright © 2018, Niklas Haas
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2018, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include <assert.h>
#include <stdint.h>
#include "common.h"
#include "common/intops.h"
#include "common/bitdepth.h"
#include "tables.h"
#include "film_grain.h"
#if BITDEPTH == 8
typedef int8_t entry;
#else
typedef int16_t entry;
#endif
enum {
GRAIN_WIDTH = 82,
GRAIN_HEIGHT = 73,
SUB_GRAIN_WIDTH = 44,
SUB_GRAIN_HEIGHT = 38,
SUB_GRAIN_OFFSET = 6,
BLOCK_SIZE = 32,
SCALING_SIZE = 1 << BITDEPTH,
};
static inline int get_random_number(const int bits, unsigned *state) {
const int r = *state;
unsigned bit = ((r >> 0) ^ (r >> 1) ^ (r >> 3) ^ (r >> 12)) & 1;
*state = (r >> 1) | (bit << 15);
return (*state >> (16 - bits)) & ((1 << bits) - 1);
}
static inline int round2(const int x, const int shift) {
return (x + ((1 << shift) >> 1)) >> shift;
}
enum {
GRAIN_CENTER = 128 << (BITDEPTH - 8),
GRAIN_MIN = -GRAIN_CENTER,
GRAIN_MAX = (256 << (BITDEPTH - 8)) - 1 - GRAIN_CENTER,
};
static void generate_grain_y(const Dav1dPicture *const in,
entry buf[GRAIN_HEIGHT][GRAIN_WIDTH])
{
const Dav1dFilmGrainData *data = &in->frame_hdr->film_grain.data;
unsigned seed = data->seed;
const int shift = 12 - BITDEPTH + data->grain_scale_shift;
for (int y = 0; y < GRAIN_HEIGHT; y++) {
for (int x = 0; x < GRAIN_WIDTH; x++) {
const int value = get_random_number(11, &seed);
buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift);
}
}
const int ar_pad = 3;
const int ar_lag = data->ar_coeff_lag;
for (int y = ar_pad; y < GRAIN_HEIGHT; y++) {
for (int x = ar_pad; x < GRAIN_WIDTH - ar_pad; x++) {
const int8_t *coeff = data->ar_coeffs_y;
int sum = 0;
for (int dy = -ar_lag; dy <= 0; dy++) {
for (int dx = -ar_lag; dx <= ar_lag; dx++) {
if (!dx && !dy)
break;
sum += *(coeff++) * buf[y + dy][x + dx];
}
}
int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
buf[y][x] = iclip(grain, GRAIN_MIN, GRAIN_MAX);
}
}
}
static void generate_grain_uv(const Dav1dPicture *const in, int uv,
entry buf[GRAIN_HEIGHT][GRAIN_WIDTH],
entry buf_y[GRAIN_HEIGHT][GRAIN_WIDTH])
{
const Dav1dFilmGrainData *data = &in->frame_hdr->film_grain.data;
unsigned seed = data->seed ^ (uv ? 0x49d8 : 0xb524);
const int shift = 12 - BITDEPTH + data->grain_scale_shift;
const int subx = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int suby = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int chromaW = subx ? SUB_GRAIN_WIDTH : GRAIN_WIDTH;
const int chromaH = suby ? SUB_GRAIN_HEIGHT : GRAIN_HEIGHT;
for (int y = 0; y < chromaH; y++) {
for (int x = 0; x < chromaW; x++) {
const int value = get_random_number(11, &seed);
buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift);
}
}
const int ar_pad = 3;
const int ar_lag = data->ar_coeff_lag;
for (int y = ar_pad; y < chromaH; y++) {
for (int x = ar_pad; x < chromaW - ar_pad; x++) {
const int8_t *coeff = data->ar_coeffs_uv[uv];
int sum = 0;
for (int dy = -ar_lag; dy <= 0; dy++) {
for (int dx = -ar_lag; dx <= ar_lag; dx++) {
// For the final (current) pixel, we need to add in the
// contribution from the luma grain texture
if (!dx && !dy) {
if (!data->num_y_points)
break;
int luma = 0;
const int lumaX = ((x - ar_pad) << subx) + ar_pad;
const int lumaY = ((y - ar_pad) << suby) + ar_pad;
for (int i = 0; i <= suby; i++) {
for (int j = 0; j <= subx; j++) {
luma += buf_y[lumaY + i][lumaX + j];
}
}
luma = round2(luma, subx + suby);
sum += luma * (*coeff);
break;
}
sum += *(coeff++) * buf[y + dy][x + dx];
}
}
const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
buf[y][x] = iclip(grain, GRAIN_MIN, GRAIN_MAX);
}
}
}
static void generate_scaling(const uint8_t points[][2], int num,
uint8_t scaling[SCALING_SIZE])
{
const int shift_x = BITDEPTH - 8;
// Fill up the preceding entries with the initial value
for (int i = 0; i < points[0][0] << shift_x; i++)
scaling[i] = points[0][1];
// Linearly interpolate the values in the middle
for (int i = 0; i < num - 1; i++) {
const int bx = points[i][0] << shift_x;
const int by = points[i][1];
const int ex = points[i+1][0] << shift_x;
const int ey = points[i+1][1];
const int dx = ex - bx;
const int dy = ey - by;
const int delta = dy * ((0xFFFF + (dx >> 1))) / dx;
for (int x = 0; x < dx; x++) {
const int v = by + ((x * delta + 0x8000) >> 16);
scaling[bx + x] = v;
}
}
// Fill up the remaining entries with the final value
for (int i = points[num - 1][0] << shift_x; i < SCALING_SIZE; i++)
scaling[i] = points[num - 1][1];
}
// samples from the correct block of a grain LUT, while taking into account the
// offsets provided by the offsets cache
static inline entry sample_lut(entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH],
int offsets[2][2], int subx, int suby,
int bx, int by, int x, int y)
{
const int randval = offsets[bx][by];
const int offx = 3 + (2 >> subx) * (3 + (randval >> 4));
const int offy = 3 + (2 >> suby) * (3 + (randval & 0xF));
return grain_lut[offy + y + (BLOCK_SIZE >> suby) * by]
[offx + x + (BLOCK_SIZE >> subx) * bx];
}
static void apply_to_row_y(Dav1dPicture *const out, const Dav1dPicture *const in,
entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH],
uint8_t scaling[SCALING_SIZE], int row_num)
{
const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
const int rows = 1 + (data->overlap_flag && row_num > 0);
int min_value, max_value;
if (data->clip_to_restricted_range) {
min_value = 16 << (BITDEPTH - 8);
max_value = 235 << (BITDEPTH - 8);
} else {
min_value = 0;
max_value = (1 << BITDEPTH) - 1;
}
// seed[0] contains the current row, seed[1] contains the previous
unsigned seed[2];
for (int i = 0; i < rows; i++) {
seed[i] = data->seed;
seed[i] ^= (((row_num - i) * 37 + 178) & 0xFF) << 8;
seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);
}
const ptrdiff_t stride = out->stride[0];
assert(stride % (BLOCK_SIZE * sizeof(pixel)) == 0);
assert(stride == in->stride[0]);
pixel *const src_row = (pixel *) in->data[0] + PXSTRIDE(stride) * row_num * BLOCK_SIZE;
pixel *const dst_row = (pixel *) out->data[0] + PXSTRIDE(stride) * row_num * BLOCK_SIZE;
int offsets[2 /* col offset */][2 /* row offset */];
// process this row in BLOCK_SIZE^2 blocks
const int bh = imin(out->p.h - row_num * BLOCK_SIZE, BLOCK_SIZE);
for (int bx = 0; bx < out->p.w; bx += BLOCK_SIZE) {
const int bw = imin(BLOCK_SIZE, out->p.w - bx);
if (data->overlap_flag && bx) {
// shift previous offsets left
for (int i = 0; i < rows; i++)
offsets[1][i] = offsets[0][i];
}
// update current offsets
for (int i = 0; i < rows; i++)
offsets[0][i] = get_random_number(8, &seed[i]);
// x/y block offsets to compensate for overlapped regions
const int ystart = data->overlap_flag && row_num ? 2 : 0;
const int xstart = data->overlap_flag && bx ? 2 : 0;
static const int w[2][2] = { { 27, 17 }, { 17, 27 } };
#define add_noise_y(x, y, grain) \
pixel *src = src_row + (y) * PXSTRIDE(stride) + (bx + (x)); \
pixel *dst = dst_row + (y) * PXSTRIDE(stride) + (bx + (x)); \
int noise = round2(scaling[ *src ] * (grain), data->scaling_shift); \
*dst = iclip(*src + noise, min_value, max_value);
for (int y = ystart; y < bh; y++) {
// Non-overlapped image region (straightforward)
for (int x = xstart; x < bw; x++) {
int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
add_noise_y(x, y, grain);
}
// Special case for overlapped column
for (int x = 0; x < xstart; x++) {
int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
int old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);
grain = round2(old * w[x][0] + grain * w[x][1], 5);
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
add_noise_y(x, y, grain);
}
}
for (int y = 0; y < ystart; y++) {
// Special case for overlapped row (sans corner)
for (int x = xstart; x < bw; x++) {
int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
int old = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);
grain = round2(old * w[y][0] + grain * w[y][1], 5);
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
add_noise_y(x, y, grain);
}
// Special case for doubly-overlapped corner
for (int x = 0; x < xstart; x++) {
// Blend the top pixel with the top left block
int top = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);
int old = sample_lut(grain_lut, offsets, 0, 0, 1, 1, x, y);
top = round2(old * w[x][0] + top * w[x][1], 5);
top = iclip(top, GRAIN_MIN, GRAIN_MAX);
// Blend the current pixel with the left block
int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);
grain = round2(old * w[x][0] + grain * w[x][1], 5);
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
// Mix the row rows together and apply grain
grain = round2(top * w[y][0] + grain * w[y][1], 5);
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
add_noise_y(x, y, grain);
}
}
}
}
static void apply_to_row_uv(Dav1dPicture *const out, const Dav1dPicture *const in,
entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH],
uint8_t scaling[SCALING_SIZE], int uv, int row_num)
{
const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
const int rows = 1 + (data->overlap_flag && row_num > 0);
int min_value, max_value;
if (data->clip_to_restricted_range) {
min_value = 16 << (BITDEPTH - 8);
if (out->seq_hdr->mtrx == DAV1D_MC_IDENTITY) {
max_value = 235 << (BITDEPTH - 8);
} else {
max_value = 240 << (BITDEPTH - 8);
}
} else {
min_value = 0;
max_value = (1 << BITDEPTH) - 1;
}
const int sx = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int sy = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
// seed[0] contains the current row, seed[1] contains the previous
unsigned seed[2];
for (int i = 0; i < rows; i++) {
seed[i] = data->seed;
seed[i] ^= (((row_num - i) * 37 + 178) & 0xFF) << 8;
seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);
}
const ptrdiff_t stride = out->stride[1];
assert(stride % (BLOCK_SIZE * sizeof(pixel)) == 0);
assert(stride == in->stride[1]);
const int by = row_num * (BLOCK_SIZE >> sy);
pixel *const dst_row = (pixel *) out->data[1 + uv] + PXSTRIDE(stride) * by;
pixel *const src_row = (pixel *) in->data[1 + uv] + PXSTRIDE(stride) * by;
pixel *const luma_row = (pixel *) out->data[0] + PXSTRIDE(out->stride[0]) * row_num * BLOCK_SIZE;
int offsets[2 /* col offset */][2 /* row offset */];
// process this row in BLOCK_SIZE^2 blocks (subsampled)
const int bh = (imin(out->p.h - row_num * BLOCK_SIZE, BLOCK_SIZE) + sy) >> sy;
for (int bx = 0; bx < (out->p.w + sx) >> sx; bx += BLOCK_SIZE >> sx) {
const int bw = (imin(BLOCK_SIZE, out->p.w - (bx << sx)) + sx) >> sx;
if (data->overlap_flag && bx) {
// shift previous offsets left
for (int i = 0; i < rows; i++)
offsets[1][i] = offsets[0][i];
}
// update current offsets
for (int i = 0; i < rows; i++)
offsets[0][i] = get_random_number(8, &seed[i]);
// x/y block offsets to compensate for overlapped regions
const int ystart = data->overlap_flag && row_num ? (2 >> sy) : 0;
const int xstart = data->overlap_flag && bx ? (2 >> sx) : 0;
static const int w[2 /* sub */][2 /* off */][2] = {
{ { 27, 17 }, { 17, 27 } },
{ { 23, 22 } },
};
#define add_noise_uv(x, y, grain) \
const int lx = (bx + x) << sx; \
const int ly = y << sy; \
pixel *luma = luma_row + ly * PXSTRIDE(out->stride[0]) + lx; \
pixel avg = luma[0]; \
if (sx && lx + 1 < out->p.w) \
avg = (avg + luma[1] + 1) >> 1; \
\
pixel *src = src_row + (y) * PXSTRIDE(stride) + (bx + (x)); \
pixel *dst = dst_row + (y) * PXSTRIDE(stride) + (bx + (x)); \
int val = avg; \
if (!data->chroma_scaling_from_luma) { \
int combined = avg * data->uv_luma_mult[uv] + \
*src * data->uv_mult[uv]; \
val = iclip_pixel( (combined >> 6) + \
(data->uv_offset[uv] * (1 << (BITDEPTH - 8))) ); \
} \
\
int noise = round2(scaling[ val ] * (grain), data->scaling_shift); \
*dst = iclip(*src + noise, min_value, max_value);
for (int y = ystart; y < bh; y++) {
// Non-overlapped image region (straightforward)
for (int x = xstart; x < bw; x++) {
int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
add_noise_uv(x, y, grain);
}
// Special case for overlapped column
for (int x = 0; x < xstart; x++) {
int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
int old = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y);
grain = (old * w[sx][x][0] + grain * w[sx][x][1] + 16) >> 5;
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
add_noise_uv(x, y, grain);
}
}
for (int y = 0; y < ystart; y++) {
// Special case for overlapped row (sans corner)
for (int x = xstart; x < bw; x++) {
int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
int old = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y);
grain = (old * w[sy][y][0] + grain * w[sy][y][1] + 16) >> 5;
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
add_noise_uv(x, y, grain);
}
// Special case for doubly-overlapped corner
for (int x = 0; x < xstart; x++) {
// Blend the top pixel with the top left block
int top = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y);
int old = sample_lut(grain_lut, offsets, sx, sy, 1, 1, x, y);
top = (old * w[sx][x][0] + top * w[sx][x][1] + 16) >> 5;
top = iclip(top, GRAIN_MIN, GRAIN_MAX);
// Blend the current pixel with the left block
int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
old = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y);
grain = (old * w[sx][x][0] + grain * w[sx][x][1] + 16) >> 5;
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
// Mix the row rows together and apply to image
grain = (top * w[sy][y][0] + grain * w[sy][y][1] + 16) >> 5;
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
add_noise_uv(x, y, grain);
}
}
}
}
void bitfn(dav1d_apply_grain)(Dav1dPicture *const out,
const Dav1dPicture *const in)
{
const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
entry grain_lut[3][GRAIN_HEIGHT][GRAIN_WIDTH];
uint8_t scaling[3][SCALING_SIZE];
// Generate grain LUTs as needed
generate_grain_y(out, grain_lut[0]); // always needed
if (data->num_uv_points[0] || data->chroma_scaling_from_luma)
generate_grain_uv(out, 0, grain_lut[1], grain_lut[0]);
if (data->num_uv_points[1] || data->chroma_scaling_from_luma)
generate_grain_uv(out, 1, grain_lut[2], grain_lut[0]);
// Generate scaling LUTs as needed
if (data->num_y_points)
generate_scaling(data->y_points, data->num_y_points, scaling[0]);
if (data->num_uv_points[0])
generate_scaling(data->uv_points[0], data->num_uv_points[0], scaling[1]);
if (data->num_uv_points[1])
generate_scaling(data->uv_points[1], data->num_uv_points[1], scaling[2]);
// Synthesize grain for the affected planes
int rows = (out->p.h + 31) >> 5;
for (int row = 0; row < rows; row++) {
if (data->num_y_points)
apply_to_row_y(out, in, grain_lut[0], scaling[0], row);
if (data->chroma_scaling_from_luma) {
apply_to_row_uv(out, in, grain_lut[1], scaling[0], 0, row);
apply_to_row_uv(out, in, grain_lut[2], scaling[0], 1, row);
} else {
if (data->num_uv_points[0])
apply_to_row_uv(out, in, grain_lut[1], scaling[1], 0, row);
if (data->num_uv_points[1])
apply_to_row_uv(out, in, grain_lut[2], scaling[2], 1, row);
}
}
// Copy over the non-modified planes
// TODO: eliminate in favor of per-plane refs
if (!data->num_y_points) {
assert(out->stride[0] == in->stride[0]);
memcpy(out->data[0], in->data[0], out->p.h * out->stride[0]);
}
if (in->p.layout != DAV1D_PIXEL_LAYOUT_I400) {
for (int i = 0; i < 2; i++) {
if (!data->num_uv_points[i] && !data->chroma_scaling_from_luma) {
const int suby = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
assert(out->stride[1] == in->stride[1]);
memcpy(out->data[1+i], in->data[1+i],
(out->p.h >> suby) * out->stride[1]);
}
}
}
}

View File

@ -75,10 +75,12 @@ struct Dav1dContext {
Dav1dData data;
int start, end;
} tile[256];
int n_tile_data, have_seq_hdr, have_frame_hdr;
int n_tile_data;
int n_tiles;
Av1SequenceHeader seq_hdr; // FIXME make ref?
Av1FrameHeader frame_hdr; // FIXME make ref?
Dav1dRef *seq_hdr_ref;
Dav1dSequenceHeader *seq_hdr;
Dav1dRef *frame_hdr_ref;
Dav1dFrameHeader *frame_hdr;
// decoded output picture queue
Dav1dData in;
@ -86,19 +88,18 @@ struct Dav1dContext {
struct {
Dav1dThreadPicture *out_delayed;
unsigned next;
// dummy is a pointer to prevent compiler errors about atomic_load()
// not taking const arguments; the const attribute is not taken
// from pointers
atomic_int flush_mem, *flush;
} frame_thread;
// reference/entropy state
struct {
Dav1dThreadPicture p;
Dav1dRef *segmap;
Av1SegmentationDataSet seg_data;
Dav1dRef *refmvs;
unsigned refpoc[7];
WarpedMotionParams gmv[7];
Av1LoopfilterModeRefDeltas lf_mode_ref_deltas;
Av1FilmGrainData film_grain;
uint8_t qidx;
} refs[8];
CdfThreadContext cdf[8];
@ -114,12 +115,20 @@ struct Dav1dContext {
} intra_edge;
Dav1dPicAllocator allocator;
int apply_grain;
int operating_point;
unsigned operating_point_idc;
int all_layers;
};
struct Dav1dFrameContext {
Av1SequenceHeader seq_hdr;
Av1FrameHeader frame_hdr;
Dav1dThreadPicture refp[7], cur;
Dav1dRef *seq_hdr_ref;
Dav1dSequenceHeader *seq_hdr;
Dav1dRef *frame_hdr_ref;
Dav1dFrameHeader *frame_hdr;
Dav1dThreadPicture refp[7];
Dav1dPicture cur; // during block coding / reconstruction
Dav1dThreadPicture sr_cur; // after super-resolution upscaling
Dav1dRef *mvs_ref;
refmvs *mvs, *ref_mvs[7];
Dav1dRef *ref_mvs_ref[7];
@ -127,6 +136,7 @@ struct Dav1dFrameContext {
uint8_t *cur_segmap;
const uint8_t *prev_segmap;
unsigned refpoc[7], refrefpoc[7][7];
uint8_t gmv_warp_allowed[7];
CdfThreadContext in_cdf, out_cdf;
struct {
Dav1dData data;
@ -139,6 +149,7 @@ struct Dav1dFrameContext {
int scale; // if no scaling, this is 0
int step;
} svc[7][2 /* x, y */];
int resize_step[2 /* y, uv */], resize_start[2 /* y, uv */];
const Dav1dContext *c;
Dav1dTileContext *tc;
@ -157,8 +168,8 @@ struct Dav1dFrameContext {
int ipred_edge_sz;
pixel *ipred_edge[3];
ptrdiff_t b4_stride;
int w4, h4, bw, bh, sb128w, sb128h, sbh, sb_shift, sb_step;
uint16_t dq[NUM_SEGMENTS][3 /* plane */][2 /* dc/ac */];
int w4, h4, bw, bh, sb128w, sb128h, sbh, sb_shift, sb_step, sr_sb128w;
uint16_t dq[DAV1D_MAX_SEGMENTS][3 /* plane */][2 /* dc/ac */];
const uint8_t *qm[2 /* is_1d */][N_RECT_TX_SIZES][3 /* plane */];
BlockContext *a;
int a_sz /* w*tile_rows */;
@ -188,8 +199,9 @@ struct Dav1dFrameContext {
struct {
uint8_t (*level)[4];
Av1Filter *mask;
Av1Restoration *lr_mask;
int top_pre_cdef_toggle;
int mask_sz /* w*h */, line_sz /* w */, re_sz /* h */;
int mask_sz /* w*h */, lr_mask_sz, line_sz /* w */, lr_line_sz, re_sz /* h */;
Av1FilterLUT lim_lut;
int last_sharpness;
uint8_t lvl[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */];
@ -201,7 +213,7 @@ struct Dav1dFrameContext {
// in-loop filter per-frame state keeping
int tile_row; // for carry-over at tile row edges
pixel *p[3];
pixel *p[3], *sr_p[3];
Av1Filter *mask_ptr, *prev_mask_ptr;
} lf;
@ -212,7 +224,7 @@ struct Dav1dFrameContext {
pthread_cond_t cond, icond;
int tasks_left, num_tasks;
int (*task_idx_to_sby_and_tile_idx)[2];
int titsati_sz, titsati_init[2];
int titsati_sz, titsati_init[3];
} tile_thread;
};
@ -235,7 +247,7 @@ struct Dav1dTileState {
coef *cf;
} frame_thread;
uint16_t dqmem[NUM_SEGMENTS][3 /* plane */][2 /* dc/ac */];
uint16_t dqmem[DAV1D_MAX_SEGMENTS][3 /* plane */][2 /* dc/ac */];
const uint16_t (*dq)[3][2];
int last_qidx;
@ -259,7 +271,7 @@ struct Dav1dTileContext {
uint16_t pal[3 /* plane */][8 /* palette_idx */];
uint8_t pal_sz_uv[2 /* a/l */][32 /* bx4/by4 */];
uint8_t txtp_map[32 * 32]; // inter-only
WarpedMotionParams warpmv;
Dav1dWarpedMotionParams warpmv;
union {
void *mem;
uint8_t *pal_idx;

View File

@ -28,6 +28,7 @@
#include "config.h"
#include <assert.h>
#include <stdlib.h>
#include "src/intra_edge.h"
#include "src/levels.h"

View File

@ -28,6 +28,8 @@
#ifndef __DAV1D_SRC_IPRED_H__
#define __DAV1D_SRC_IPRED_H__
#include <stddef.h>
#include "common/bitdepth.h"
#include "src/levels.h"

View File

@ -28,6 +28,8 @@
#ifndef __DAV1D_SRC_ITX_H__
#define __DAV1D_SRC_ITX_H__
#include <stddef.h>
#include "common/bitdepth.h"
#include "src/levels.h"

File diff suppressed because it is too large Load Diff

View File

@ -40,7 +40,7 @@
#include "src/itx_1d.c"
typedef void (*itx_1d_fn)(const coef *in, ptrdiff_t in_s,
coef *out, ptrdiff_t out_s);
coef *out, ptrdiff_t out_s, const int range);
static void NOINLINE
inv_txfm_add_c(pixel *dst, const ptrdiff_t stride,
@ -54,6 +54,9 @@ inv_txfm_add_c(pixel *dst, const ptrdiff_t stride,
// Maximum value for h and w is 64
coef tmp[4096 /* w * h */], out[64 /* h */], in_mem[64 /* w */];
const int is_rect2 = w * 2 == h || h * 2 == w;
const int row_clip_max = (1 << (BITDEPTH + 8 - 1)) - 1;
const int col_clip_max = (1 << (imax(BITDEPTH + 6, 16) - 1)) -1;
const int col_clip_min = -col_clip_max - 1;
if (w != sw) memset(&in_mem[sw], 0, (w - sw) * sizeof(*in_mem));
const int rnd1 = (1 << shift1) >> 1;
@ -64,18 +67,19 @@ inv_txfm_add_c(pixel *dst, const ptrdiff_t stride,
if (is_rect2)
in_mem[j] = (in_mem[j] * 2896 + 2048) >> 12;
}
first_1d_fn(in_mem, 1, &tmp[i * w], 1);
first_1d_fn(in_mem, 1, &tmp[i * w], 1, row_clip_max);
} else {
first_1d_fn(&coeff[i], sh, &tmp[i * w], 1);
first_1d_fn(&coeff[i], sh, &tmp[i * w], 1, row_clip_max);
}
for (j = 0; j < w; j++)
tmp[i * w + j] = (tmp[i * w + j] + (rnd1)) >> shift1;
tmp[i * w + j] = iclip((tmp[i * w + j] + (rnd1)) >> shift1,
col_clip_min, col_clip_max);
}
if (h != sh) memset(&tmp[sh * w], 0, w * (h - sh) * sizeof(*tmp));
const int rnd2 = (1 << shift2) >> 1;
for (i = 0; i < w; i++) {
second_1d_fn(&tmp[i], w, out, 1);
second_1d_fn(&tmp[i], w, out, 1, col_clip_max);
for (j = 0; j < h; j++)
dst[i + j * PXSTRIDE(stride)] =
iclip_pixel(dst[i + j * PXSTRIDE(stride)] +
@ -145,15 +149,18 @@ inv_txfm_fn64(64, 64, 2, 4)
static void inv_txfm_add_wht_wht_4x4_c(pixel *dst, const ptrdiff_t stride,
coef *const coeff, const int eob)
{
int i, j;
const int col_clip_max = (1 << (imax(BITDEPTH + 6, 16) - 1)) -1;
const int col_clip_min = -col_clip_max - 1;
coef tmp[4 * 4], out[4];
for (i = 0; i < 4; i++)
for (int i = 0; i < 4; i++)
inv_wht4_1d(&coeff[i], 4, &tmp[i * 4], 1, 0);
for (int k = 0; k < 4 * 4; k++)
tmp[k] = iclip(tmp[k], col_clip_min, col_clip_max);
for (i = 0; i < 4; i++) {
for (int i = 0; i < 4; i++) {
inv_wht4_1d(&tmp[i], 4, out, 1, 1);
for (j = 0; j < 4; j++)
for (int j = 0; j < 4; j++)
dst[i + j * PXSTRIDE(stride)] =
iclip_pixel(dst[i + j * PXSTRIDE(stride)] + out[j]);
}

View File

@ -28,7 +28,9 @@
#ifndef __DAV1D_SRC_LEVELS_H__
#define __DAV1D_SRC_LEVELS_H__
#include "dav1d/picture.h"
#include <stdint.h>
#include "dav1d/headers.h"
enum ObuType {
OBU_SEQ_HDR = 1,
@ -41,10 +43,6 @@ enum ObuType {
OBU_PADDING = 15,
};
// Constants from Section 3. "Symbols and abbreviated terms"
#define MAX_TILE_COLS 64
#define MAX_TILE_ROWS 64
enum TxfmSize {
TX_4X4,
TX_8X8,
@ -63,13 +61,6 @@ enum BlockLevel {
N_BL_LEVELS,
};
enum TxfmMode {
TX_4X4_ONLY,
TX_LARGEST,
TX_SWITCHABLE,
N_TX_MODES,
};
enum RectTxfmSize {
RTX_4X8 = N_TX_SIZES,
RTX_8X4,
@ -203,16 +194,6 @@ enum BlockSize {
N_BS_SIZES,
};
enum FilterMode {
FILTER_8TAP_REGULAR,
FILTER_8TAP_SMOOTH,
FILTER_8TAP_SHARP,
N_SWITCHABLE_FILTERS,
FILTER_BILINEAR = N_SWITCHABLE_FILTERS,
N_FILTERS,
FILTER_SWITCHABLE = N_FILTERS,
};
enum Filter2d { // order is horizontal, vertical
FILTER_2D_8TAP_REGULAR,
FILTER_2D_8TAP_REGULAR_SMOOTH,
@ -269,242 +250,16 @@ enum InterIntraType {
INTER_INTRA_WEDGE,
};
enum AdaptiveBoolean {
OFF = 0,
ON = 1,
ADAPTIVE = 2,
};
enum RestorationType {
RESTORATION_NONE,
RESTORATION_SWITCHABLE,
RESTORATION_WIENER,
RESTORATION_SGRPROJ,
};
typedef struct mv {
int16_t y, x;
} mv;
enum WarpedMotionType {
WM_TYPE_IDENTITY,
WM_TYPE_TRANSLATION,
WM_TYPE_ROT_ZOOM,
WM_TYPE_AFFINE,
};
typedef struct WarpedMotionParams {
enum WarpedMotionType type;
int32_t matrix[6];
union {
struct {
int16_t alpha, beta, gamma, delta;
};
int16_t abcd[4];
};
} WarpedMotionParams;
enum MotionMode {
MM_TRANSLATION,
MM_OBMC,
MM_WARP,
};
typedef struct Av1SequenceHeader {
int profile;
int still_picture;
int reduced_still_picture_header;
int timing_info_present;
int num_units_in_tick;
int time_scale;
int equal_picture_interval;
int num_ticks_per_picture;
int decoder_model_info_present;
int encoder_decoder_buffer_delay_length;
int num_units_in_decoding_tick;
int buffer_removal_delay_length;
int frame_presentation_delay_length;
int display_model_info_present;
int num_operating_points;
struct Av1SequenceHeaderOperatingPoint {
int idc;
int major_level, minor_level;
int tier;
int decoder_model_param_present;
int decoder_buffer_delay;
int encoder_buffer_delay;
int low_delay_mode;
int display_model_param_present;
int initial_display_delay;
} operating_points[32];
int max_width, max_height, width_n_bits, height_n_bits;
int frame_id_numbers_present;
int delta_frame_id_n_bits;
int frame_id_n_bits;
int sb128;
int filter_intra;
int intra_edge_filter;
int inter_intra;
int masked_compound;
int warped_motion;
int dual_filter;
int order_hint;
int jnt_comp;
int ref_frame_mvs;
enum AdaptiveBoolean screen_content_tools;
enum AdaptiveBoolean force_integer_mv;
int order_hint_n_bits;
int super_res;
int cdef;
int restoration;
int bpc;
int hbd;
int color_description_present;
enum Dav1dPixelLayout layout;
enum Dav1dColorPrimaries pri;
enum Dav1dTransferCharacteristics trc;
enum Dav1dMatrixCoefficients mtrx;
enum Dav1dChromaSamplePosition chr;
int color_range;
int separate_uv_delta_q;
int film_grain_present;
} Av1SequenceHeader;
#define NUM_SEGMENTS 8
typedef struct Av1SegmentationData {
int delta_q;
int delta_lf_y_v, delta_lf_y_h, delta_lf_u, delta_lf_v;
int ref;
int skip;
int globalmv;
} Av1SegmentationData;
typedef struct Av1SegmentationDataSet {
Av1SegmentationData d[NUM_SEGMENTS];
int preskip;
int last_active_segid;
} Av1SegmentationDataSet;
typedef struct Av1LoopfilterModeRefDeltas {
int mode_delta[2];
int ref_delta[8];
} Av1LoopfilterModeRefDeltas;
typedef struct Av1FilmGrainData {
int num_y_points;
uint8_t y_points[14][2 /* value, scaling */];
int chroma_scaling_from_luma;
int num_uv_points[2];
uint8_t uv_points[2][10][2 /* value, scaling */];
int scaling_shift;
int ar_coeff_lag;
int8_t ar_coeffs_y[24];
int8_t ar_coeffs_uv[2][25];
int ar_coeff_shift;
int grain_scale_shift;
int uv_mult[2];
int uv_luma_mult[2];
int uv_offset[2];
int overlap_flag;
int clip_to_restricted_range;
} Av1FilmGrainData;
typedef struct Av1FrameHeader {
int show_existing_frame;
int existing_frame_idx;
int frame_id;
int frame_presentation_delay;
enum Dav1dFrameType frame_type;
int show_frame;
int showable_frame;
int error_resilient_mode;
int disable_cdf_update;
int allow_screen_content_tools;
int force_integer_mv;
int frame_size_override;
#define PRIMARY_REF_NONE 7
int primary_ref_frame;
int buffer_removal_time_present;
struct Av1FrameHeaderOperatingPoint {
int buffer_removal_time;
} operating_points[32];
int frame_offset;
int refresh_frame_flags;
int width, height;
int render_width, render_height;
int super_res;
int have_render_size;
int allow_intrabc;
int frame_ref_short_signaling;
int refidx[7];
int hp;
enum FilterMode subpel_filter_mode;
int switchable_motion_mode;
int use_ref_frame_mvs;
int refresh_context;
struct {
int uniform;
unsigned n_bytes;
int min_log2_cols, max_log2_cols, log2_cols, cols;
int min_log2_rows, max_log2_rows, log2_rows, rows;
uint16_t col_start_sb[MAX_TILE_COLS + 1];
uint16_t row_start_sb[MAX_TILE_ROWS + 1];
int update;
} tiling;
struct {
int yac;
int ydc_delta;
int udc_delta, uac_delta, vdc_delta, vac_delta;
int qm, qm_y, qm_u, qm_v;
} quant;
struct {
int enabled, update_map, temporal, update_data;
Av1SegmentationDataSet seg_data;
int lossless[NUM_SEGMENTS], qidx[NUM_SEGMENTS];
} segmentation;
struct {
struct {
int present;
int res_log2;
} q;
struct {
int present;
int res_log2;
int multi;
} lf;
} delta;
int all_lossless;
struct {
int level_y[2];
int level_u, level_v;
int mode_ref_delta_enabled;
int mode_ref_delta_update;
Av1LoopfilterModeRefDeltas mode_ref_deltas;
int sharpness;
} loopfilter;
struct {
int damping;
int n_bits;
int y_strength[8];
int uv_strength[8];
} cdef;
struct {
enum RestorationType type[3];
int unit_size[2];
} restoration;
enum TxfmMode txfm_mode;
int switchable_comp_refs;
int skip_mode_allowed, skip_mode_enabled, skip_mode_refs[2];
int warp_motion;
int reduced_txtp_set;
WarpedMotionParams gmv[7];
struct {
int present, update, seed;
Av1FilmGrainData data;
} film_grain;
} Av1FrameHeader;
#define QINDEX_RANGE 256
typedef struct Av1Block {

View File

@ -178,13 +178,13 @@ void bytefn(dav1d_loopfilter_sbrow)(const Dav1dFrameContext *const f,
int x, have_left;
// Don't filter outside the frame
const int have_top = sby > 0;
const int is_sb64 = !f->seq_hdr.sb128;
const int is_sb64 = !f->seq_hdr->sb128;
const int starty4 = (sby & is_sb64) << 4;
const int sbsz = 32 >> is_sb64;
const int sbl2 = 5 - is_sb64;
const int halign = (f->bh + 31) & ~31;
const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
const unsigned vmax = 1U << vmask, hmax = 1U << hmask;
const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
@ -194,7 +194,7 @@ void bytefn(dav1d_loopfilter_sbrow)(const Dav1dFrameContext *const f,
const uint8_t *lpf_y = &f->lf.tx_lpf_right_edge[0][sby << sbl2];
const uint8_t *lpf_uv = &f->lf.tx_lpf_right_edge[1][sby << (sbl2 - ss_ver)];
for (int tile_col = 1;; tile_col++) {
x = f->frame_hdr.tiling.col_start_sb[tile_col];
x = f->frame_hdr->tiling.col_start_sb[tile_col];
if ((x << sbl2) >= f->bw) break;
const int bx4 = x & is_sb64 ? 16 : 0, cbx4 = bx4 >> ss_hor;
x >>= is_sb64;
@ -211,7 +211,7 @@ void bytefn(dav1d_loopfilter_sbrow)(const Dav1dFrameContext *const f,
y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;
}
if (f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];
for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;
y++, uv_mask <<= 1)
@ -247,7 +247,7 @@ void bytefn(dav1d_loopfilter_sbrow)(const Dav1dFrameContext *const f,
y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;
}
if (f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
const unsigned cw = (w + ss_hor) >> ss_hor;
uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];
for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {
@ -268,18 +268,18 @@ void bytefn(dav1d_loopfilter_sbrow)(const Dav1dFrameContext *const f,
x++, have_left = 1, ptr += 128, level_ptr += 32)
{
filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
lflvl[x].filter_y[0], ptr, f->cur.p.stride[0],
lflvl[x].filter_y[0], ptr, f->cur.stride[0],
imin(32, f->w4 - x * 32), starty4, endy4);
}
level_ptr = f->lf.level + f->b4_stride * sby * sbsz;
for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
lflvl[x].filter_y[1], ptr, f->cur.p.stride[0],
lflvl[x].filter_y[1], ptr, f->cur.stride[0],
imin(32, f->w4 - x * 32), starty4, endy4);
}
if (!f->frame_hdr.loopfilter.level_u && !f->frame_hdr.loopfilter.level_v)
if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
return;
ptrdiff_t uv_off;
@ -289,7 +289,7 @@ void bytefn(dav1d_loopfilter_sbrow)(const Dav1dFrameContext *const f,
{
filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
lflvl[x].filter_uv[0],
&p[1][uv_off], &p[2][uv_off], f->cur.p.stride[1],
&p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
(imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
starty4 >> ss_ver, uv_endy4, ss_ver);
}
@ -300,7 +300,7 @@ void bytefn(dav1d_loopfilter_sbrow)(const Dav1dFrameContext *const f,
{
filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
lflvl[x].filter_uv[1],
&p[1][uv_off], &p[2][uv_off], f->cur.p.stride[1],
&p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
(imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
starty4 >> ss_ver, uv_endy4, ss_hor);
}

View File

@ -287,7 +287,7 @@ static inline void mask_edges_chroma(uint16_t (*const masks)[32][2][2],
void dav1d_create_lf_mask_intra(Av1Filter *const lflvl,
uint8_t (*const level_cache)[4],
const ptrdiff_t b4_stride,
const Av1FrameHeader *const hdr,
const Dav1dFrameHeader *const hdr,
const uint8_t (*filter_level)[8][2],
const int bx, const int by,
const int iw, const int ih,
@ -351,7 +351,7 @@ void dav1d_create_lf_mask_intra(Av1Filter *const lflvl,
void dav1d_create_lf_mask_inter(Av1Filter *const lflvl,
uint8_t (*const level_cache)[4],
const ptrdiff_t b4_stride,
const Av1FrameHeader *const hdr,
const Dav1dFrameHeader *const hdr,
const uint8_t (*filter_level)[8][2],
const int bx, const int by,
const int iw, const int ih,
@ -435,7 +435,7 @@ void dav1d_calc_eih(Av1FilterLUT *const lim_lut, const int filter_sharpness) {
static inline void calc_lf_value(uint8_t (*const lflvl_values)[2],
const int is_chroma, const int base_lvl,
const int lf_delta, const int seg_delta,
const Av1LoopfilterModeRefDeltas *const mr_delta)
const Dav1dLoopfilterModeRefDeltas *const mr_delta)
{
const int base = iclip(iclip(base_lvl + lf_delta, 0, 63) + seg_delta, 0, 63);
@ -458,7 +458,7 @@ static inline void calc_lf_value(uint8_t (*const lflvl_values)[2],
}
void dav1d_calc_lf_values(uint8_t (*const lflvl_values)[4][8][2],
const Av1FrameHeader *const hdr,
const Dav1dFrameHeader *const hdr,
const int8_t lf_delta[4])
{
const int n_seg = hdr->segmentation.enabled ? 8 : 1;
@ -468,11 +468,11 @@ void dav1d_calc_lf_values(uint8_t (*const lflvl_values)[4][8][2],
return;
}
const Av1LoopfilterModeRefDeltas *const mr_deltas =
const Dav1dLoopfilterModeRefDeltas *const mr_deltas =
hdr->loopfilter.mode_ref_delta_enabled ?
&hdr->loopfilter.mode_ref_deltas : NULL;
for (int s = 0; s < n_seg; s++) {
const Av1SegmentationData *const segd =
const Dav1dSegmentationData *const segd =
hdr->segmentation.enabled ? &hdr->segmentation.seg_data.d[s] : NULL;
calc_lf_value(lflvl_values[s][0], 0, hdr->loopfilter.level_y[0],

View File

@ -40,26 +40,30 @@ typedef struct Av1FilterLUT {
} Av1FilterLUT;
typedef struct Av1RestorationUnit {
enum RestorationType type;
enum Dav1dRestorationType type;
int16_t filter_h[3];
int16_t filter_v[3];
uint8_t sgr_idx;
int16_t sgr_weights[2];
} Av1RestorationUnit;
// each struct describes one 128x128 area (1 or 4 SBs)
// each struct describes one 128x128 area (1 or 4 SBs), pre-superres-scaling
typedef struct Av1Filter {
// each bit is 1 col
uint16_t filter_y[2 /* 0=col, 1=row */][32][3][2];
uint16_t filter_uv[2 /* 0=col, 1=row */][32][2][2];
int8_t cdef_idx[4]; // -1 means "unset"
uint16_t noskip_mask[32][2];
Av1RestorationUnit lr[3][4];
} Av1Filter;
// each struct describes one 128x128 area (1 or 4 SBs), post-superres-scaling
typedef struct Av1Restoration {
Av1RestorationUnit lr[3][4];
} Av1Restoration;
void dav1d_create_lf_mask_intra(Av1Filter *lflvl, uint8_t (*level_cache)[4],
const ptrdiff_t b4_stride,
const Av1FrameHeader *hdr,
const Dav1dFrameHeader *hdr,
const uint8_t (*level)[8][2], int bx, int by,
int iw, int ih, enum BlockSize bs,
enum RectTxfmSize ytx, enum RectTxfmSize uvtx,
@ -67,7 +71,7 @@ void dav1d_create_lf_mask_intra(Av1Filter *lflvl, uint8_t (*level_cache)[4],
uint8_t *ly, uint8_t *auv, uint8_t *luv);
void dav1d_create_lf_mask_inter(Av1Filter *lflvl, uint8_t (*level_cache)[4],
const ptrdiff_t b4_stride,
const Av1FrameHeader *hdr,
const Dav1dFrameHeader *hdr,
const uint8_t (*level)[8][2], int bx, int by,
int iw, int ih, int skip_inter,
enum BlockSize bs, const uint16_t *tx_mask,
@ -75,7 +79,7 @@ void dav1d_create_lf_mask_inter(Av1Filter *lflvl, uint8_t (*level_cache)[4],
enum Dav1dPixelLayout layout, uint8_t *ay,
uint8_t *ly, uint8_t *auv, uint8_t *luv);
void dav1d_calc_eih(Av1FilterLUT *lim_lut, int filter_sharpness);
void dav1d_calc_lf_values(uint8_t (*values)[4][8][2], const Av1FrameHeader *hdr,
void dav1d_calc_lf_values(uint8_t (*values)[4][8][2], const Dav1dFrameHeader *hdr,
const int8_t lf_delta[4]);
#endif /* __DAV1D_SRC_LF_MASK_H__ */

View File

@ -43,6 +43,7 @@
#include "src/ref.h"
#include "src/thread_task.h"
#include "src/wedge.h"
#include "src/film_grain.h"
static void init_internal(void) {
dav1d_init_wedge_masks();
@ -57,9 +58,12 @@ const char *dav1d_version(void) {
void dav1d_default_settings(Dav1dSettings *const s) {
s->n_frame_threads = 1;
s->n_tile_threads = 1;
s->apply_grain = 1;
s->allocator.cookie = NULL;
s->allocator.alloc_picture_callback = default_picture_allocator;
s->allocator.release_picture_callback = default_picture_release;
s->operating_point = 0;
s->all_layers = 1; // just until the tests are adjusted
}
int dav1d_open(Dav1dContext **const c_out,
@ -71,19 +75,26 @@ int dav1d_open(Dav1dContext **const c_out,
validate_input_or_ret(c_out != NULL, -EINVAL);
validate_input_or_ret(s != NULL, -EINVAL);
validate_input_or_ret(s->n_tile_threads >= 1 &&
s->n_tile_threads <= 64, -EINVAL);
s->n_tile_threads <= DAV1D_MAX_TILE_THREADS, -EINVAL);
validate_input_or_ret(s->n_frame_threads >= 1 &&
s->n_frame_threads <= 256, -EINVAL);
s->n_frame_threads <= DAV1D_MAX_FRAME_THREADS, -EINVAL);
validate_input_or_ret(s->allocator.alloc_picture_callback != NULL,
-EINVAL);
validate_input_or_ret(s->allocator.release_picture_callback != NULL,
-EINVAL);
validate_input_or_ret(s->operating_point >= 0 &&
s->operating_point <= 31, -EINVAL);
Dav1dContext *const c = *c_out = dav1d_alloc_aligned(sizeof(*c), 32);
if (!c) goto error;
memset(c, 0, sizeof(*c));
c->allocator = s->allocator;
c->apply_grain = s->apply_grain;
c->operating_point = s->operating_point;
c->all_layers = s->all_layers;
c->frame_thread.flush = &c->frame_thread.flush_mem;
atomic_init(c->frame_thread.flush, 0);
c->n_fc = s->n_frame_threads;
c->fc = dav1d_alloc_aligned(sizeof(*c->fc) * s->n_frame_threads, 32);
if (!c->fc) goto error;
@ -157,6 +168,54 @@ error:
return -ENOMEM;
}
static void dummy_free(const uint8_t *const data, void *const user_data) {
assert(data && !user_data);
}
int dav1d_parse_sequence_header(Dav1dSequenceHeader *const out,
const uint8_t *const ptr, const size_t sz)
{
Dav1dData buf = { 0 };
int res;
validate_input_or_ret(out != NULL, -EINVAL);
Dav1dSettings s;
dav1d_default_settings(&s);
Dav1dContext *c;
res = dav1d_open(&c, &s);
if (res < 0) return res;
if (ptr) {
res = dav1d_data_wrap(&buf, ptr, sz, dummy_free, NULL);
if (res < 0) goto error;
}
while (buf.sz > 0) {
res = dav1d_parse_obus(c, &buf, 1);
if (res < 0) goto error;
assert((size_t)res <= buf.sz);
buf.sz -= res;
buf.data += res;
}
if (!c->seq_hdr) {
res = -EINVAL;
goto error;
}
memcpy(out, c->seq_hdr, sizeof(*out));
res = 0;
error:
dav1d_data_unref(&buf);
dav1d_close(&c);
return res;
}
int dav1d_send_data(Dav1dContext *const c, Dav1dData *const in)
{
validate_input_or_ret(c != NULL, -EINVAL);
@ -170,6 +229,52 @@ int dav1d_send_data(Dav1dContext *const c, Dav1dData *const in)
return 0;
}
static int output_image(Dav1dContext *const c, Dav1dPicture *const out,
Dav1dPicture *const in)
{
const Dav1dFilmGrainData *fgdata = &in->frame_hdr->film_grain.data;
int has_grain = fgdata->num_y_points || fgdata->num_uv_points[0] ||
fgdata->num_uv_points[1];
// skip lower spatial layers
if (c->operating_point_idc && !c->all_layers) {
const int max_spatial_id = ulog2(c->operating_point_idc >> 8);
if (max_spatial_id > in->frame_hdr->spatial_id) {
dav1d_picture_unref(in);
return 0;
}
}
// If there is nothing to be done, skip the allocation/copy
if (!c->apply_grain || !has_grain) {
dav1d_picture_move_ref(out, in);
return 0;
}
// Apply film grain to a new copy of the image to avoid corrupting refs
int res = dav1d_picture_alloc_copy(out, in->p.w, in);
if (res < 0)
return res;
switch (out->p.bpc) {
#if CONFIG_8BPC
case 8:
dav1d_apply_grain_8bpc(out, in);
break;
#endif
#if CONFIG_10BPC
case 10:
dav1d_apply_grain_10bpc(out, in);
break;
#endif
default:
assert(0);
}
dav1d_picture_unref(in);
return 0;
}
int dav1d_get_picture(Dav1dContext *const c, Dav1dPicture *const out)
{
int res;
@ -197,21 +302,20 @@ int dav1d_get_picture(Dav1dContext *const c, Dav1dPicture *const out)
if (++c->frame_thread.next == c->n_fc)
c->frame_thread.next = 0;
if (out_delayed->p.data[0]) {
if (out_delayed->visible && !out_delayed->flushed) {
dav1d_picture_ref(out, &out_delayed->p);
}
const unsigned progress = atomic_load_explicit(&out_delayed->progress[1],
memory_order_relaxed);
if (out_delayed->visible && progress != FRAME_ERROR)
dav1d_picture_ref(&c->out, &out_delayed->p);
dav1d_thread_picture_unref(out_delayed);
if (out->data[0]) {
return 0;
}
// else continue
if (c->out.data[0])
return output_image(c, out, &c->out);
}
} while (++flush_count < c->n_fc);
return -EAGAIN;
}
while (in->sz > 0) {
if ((res = dav1d_parse_obus(c, in)) < 0) {
if ((res = dav1d_parse_obus(c, in, 0)) < 0) {
dav1d_data_unref(in);
return res;
}
@ -220,16 +324,12 @@ int dav1d_get_picture(Dav1dContext *const c, Dav1dPicture *const out)
in->sz -= res;
in->data += res;
if (!in->sz) dav1d_data_unref(in);
if (c->out.data[0]) {
dav1d_picture_move_ref(out, &c->out);
return 0;
}
if (c->out.data[0])
break;
}
if (c->out.data[0]) {
dav1d_picture_move_ref(out, &c->out);
return 0;
}
if (c->out.data[0])
return output_image(c, out, &c->out);
return -EAGAIN;
}
@ -239,8 +339,39 @@ void dav1d_flush(Dav1dContext *const c) {
if (c->n_fc == 1) return;
for (unsigned n = 0; n < c->n_fc; n++)
c->frame_thread.out_delayed[n].flushed = 1;
// mark each currently-running frame as flushing, so that we
// exit out as quickly as the running thread checks this flag
atomic_store(c->frame_thread.flush, 1);
for (unsigned n = 0, next = c->frame_thread.next; n < c->n_fc; n++, next++) {
if (next == c->n_fc) next = 0;
Dav1dFrameContext *const f = &c->fc[next];
pthread_mutex_lock(&f->frame_thread.td.lock);
if (f->n_tile_data > 0) {
while (f->n_tile_data > 0)
pthread_cond_wait(&f->frame_thread.td.cond,
&f->frame_thread.td.lock);
assert(!f->cur.data[0]);
}
pthread_mutex_unlock(&f->frame_thread.td.lock);
Dav1dThreadPicture *const out_delayed = &c->frame_thread.out_delayed[next];
if (out_delayed->p.data[0])
dav1d_thread_picture_unref(out_delayed);
}
atomic_store(c->frame_thread.flush, 0);
for (int i = 0; i < 8; i++) {
if (c->refs[i].p.p.data[0])
dav1d_thread_picture_unref(&c->refs[i].p);
dav1d_ref_dec(&c->refs[i].segmap);
dav1d_ref_dec(&c->refs[i].refmvs);
if (c->cdf[i].cdf)
dav1d_cdf_thread_unref(&c->cdf[i]);
}
c->frame_hdr = NULL;
c->seq_hdr = NULL;
dav1d_ref_dec(&c->seq_hdr_ref);
c->frame_thread.next = 0;
}
void dav1d_close(Dav1dContext **const c_out) {
@ -249,6 +380,7 @@ void dav1d_close(Dav1dContext **const c_out) {
Dav1dContext *const c = *c_out;
if (!c) return;
dav1d_flush(c);
for (unsigned n = 0; n < c->n_fc; n++) {
Dav1dFrameContext *const f = &c->fc[n];
@ -259,22 +391,6 @@ void dav1d_close(Dav1dContext **const c_out) {
pthread_cond_signal(&f->frame_thread.td.cond);
pthread_mutex_unlock(&f->frame_thread.td.lock);
pthread_join(f->frame_thread.td.thread, NULL);
// free references from dav1d_submit_frame() usually freed by
// dav1d_decode_frame
for (int i = 0; i < 7; i++) {
if (f->refp[i].p.data[0])
dav1d_thread_picture_unref(&f->refp[i]);
dav1d_ref_dec(&f->ref_mvs_ref[i]);
}
dav1d_thread_picture_unref(&f->cur);
dav1d_cdf_thread_unref(&f->in_cdf);
if (f->frame_hdr.refresh_context)
dav1d_cdf_thread_unref(&f->out_cdf);
dav1d_ref_dec(&f->cur_segmap_ref);
dav1d_ref_dec(&f->prev_segmap_ref);
dav1d_ref_dec(&f->mvs_ref);
for (int i = 0; i < f->n_tile_data; i++)
dav1d_data_unref(&f->tile[i].data);
freep(&f->frame_thread.b);
dav1d_freep_aligned(&f->frame_thread.pal_idx);
dav1d_freep_aligned(&f->frame_thread.cf);
@ -324,6 +440,7 @@ void dav1d_close(Dav1dContext **const c_out) {
dav1d_free_aligned(f->ipred_edge[0]);
free(f->a);
free(f->lf.mask);
free(f->lf.lr_mask);
free(f->lf.level);
free(f->lf.tx_lpf_right_edge[0]);
av1_free_ref_mv_common(f->libaom_cm);
@ -348,5 +465,8 @@ void dav1d_close(Dav1dContext **const c_out) {
dav1d_ref_dec(&c->refs[n].refmvs);
dav1d_ref_dec(&c->refs[n].segmap);
}
dav1d_ref_dec(&c->seq_hdr_ref);
dav1d_ref_dec(&c->frame_hdr_ref);
dav1d_freep_aligned(c_out);
}

View File

@ -74,6 +74,8 @@ typedef struct Dav1dLoopRestorationDSPContext {
void dav1d_loop_restoration_dsp_init_8bpc(Dav1dLoopRestorationDSPContext *c);
void dav1d_loop_restoration_dsp_init_10bpc(Dav1dLoopRestorationDSPContext *c);
void dav1d_loop_restoration_dsp_init_arm_8bpc(Dav1dLoopRestorationDSPContext *c);
void dav1d_loop_restoration_dsp_init_arm_10bpc(Dav1dLoopRestorationDSPContext *c);
void dav1d_loop_restoration_dsp_init_x86_8bpc(Dav1dLoopRestorationDSPContext *c);
void dav1d_loop_restoration_dsp_init_x86_10bpc(Dav1dLoopRestorationDSPContext *c);

View File

@ -573,7 +573,11 @@ void bitfn(dav1d_loop_restoration_dsp_init)(Dav1dLoopRestorationDSPContext *cons
c->wiener = wiener_c;
c->selfguided = selfguided_c;
#if HAVE_ASM && ARCH_X86 && BITDEPTH == 8
#if HAVE_ASM
#if ARCH_AARCH64 || ARCH_ARM
bitfn(dav1d_loop_restoration_dsp_init_arm)(c);
#elif ARCH_X86
bitfn(dav1d_loop_restoration_dsp_init_x86)(c);
#endif
#endif
}

View File

@ -33,7 +33,6 @@
#include "src/lr_apply.h"
enum LrRestorePlanes {
LR_RESTORE_Y = 1 << 0,
LR_RESTORE_U = 1 << 1,
@ -44,13 +43,14 @@ enum LrRestorePlanes {
// contain at most 2 stripes. Each stripe requires 4 rows pixels (2 above
// and 2 below) the final 4 rows are used to swap the bottom of the last
// stripe with the top of the next super block row.
static void backup_lpf(pixel *dst, ptrdiff_t dst_stride,
const pixel *src, ptrdiff_t src_stride,
static void backup_lpf(const Dav1dFrameContext *const f,
pixel *dst, const ptrdiff_t dst_stride,
const pixel *src, const ptrdiff_t src_stride,
const int ss_ver, const int sb128,
int row, const int row_h, const int w)
int row, const int row_h, const int src_w, const int ss_hor)
{
src_stride = PXSTRIDE(src_stride);
dst_stride = PXSTRIDE(dst_stride);
const int dst_w = f->frame_hdr->super_res.enabled ?
(f->frame_hdr->width[1] + ss_hor) >> ss_hor : src_w;
// The first stripe of the frame is shorter by 8 luma pixel rows.
int stripe_h = (64 - 8 * !row) >> ss_ver;
@ -59,23 +59,40 @@ static void backup_lpf(pixel *dst, ptrdiff_t dst_stride,
const int top = 4 << sb128;
// Copy the top part of the stored loop filtered pixels from the
// previous sb row needed above the first stripe of this sb row.
pixel_copy(&dst[dst_stride * 0], &dst[dst_stride * top], w);
pixel_copy(&dst[dst_stride * 1], &dst[dst_stride * (top + 1)], w);
pixel_copy(&dst[dst_stride * 2], &dst[dst_stride * (top + 2)], w);
pixel_copy(&dst[dst_stride * 3], &dst[dst_stride * (top + 3)], w);
pixel_copy(&dst[PXSTRIDE(dst_stride) * 0],
&dst[PXSTRIDE(dst_stride) * top], dst_w);
pixel_copy(&dst[PXSTRIDE(dst_stride) * 1],
&dst[PXSTRIDE(dst_stride) * (top + 1)], dst_w);
pixel_copy(&dst[PXSTRIDE(dst_stride) * 2],
&dst[PXSTRIDE(dst_stride) * (top + 2)], dst_w);
pixel_copy(&dst[PXSTRIDE(dst_stride) * 3],
&dst[PXSTRIDE(dst_stride) * (top + 3)], dst_w);
}
dst += 4 * dst_stride;
src += (stripe_h - 2) * src_stride;
dst += 4 * PXSTRIDE(dst_stride);
src += (stripe_h - 2) * PXSTRIDE(src_stride);
for (; row + stripe_h <= row_h; row += stripe_h) {
for (int i = 0; i < 4; i++) {
pixel_copy(dst, src, w);
dst += dst_stride;
src += src_stride;
if (f->frame_hdr->super_res.enabled) {
while (row + stripe_h <= row_h) {
f->dsp->mc.resize(dst, dst_stride, src, src_stride,
dst_w, src_w, 4, f->resize_step[ss_hor],
f->resize_start[ss_hor]);
row += stripe_h; // unmodified stripe_h for the 1st stripe
stripe_h = 64 >> ss_ver;
src += stripe_h * PXSTRIDE(src_stride);
dst += 4 * PXSTRIDE(dst_stride);
}
} else {
while (row + stripe_h <= row_h) {
for (int i = 0; i < 4; i++) {
pixel_copy(dst, src, src_w);
dst += PXSTRIDE(dst_stride);
src += PXSTRIDE(src_stride);
}
row += stripe_h; // unmodified stripe_h for the 1st stripe
stripe_h = 64 >> ss_ver;
src += (stripe_h - 4) * PXSTRIDE(src_stride);
}
stripe_h = 64 >> ss_ver;
src += (stripe_h - 4) * src_stride;
}
}
@ -83,47 +100,47 @@ void bytefn(dav1d_lr_copy_lpf)(Dav1dFrameContext *const f,
/*const*/ pixel *const src[3], const int sby)
{
const ptrdiff_t offset = 8 * !!sby;
const ptrdiff_t *const src_stride = f->cur.p.stride;
const ptrdiff_t *const src_stride = f->cur.stride;
const ptrdiff_t lr_stride = ((f->sr_cur.p.p.w + 31) & ~31) * sizeof(pixel);
// TODO Also check block level restore type to reduce copying.
const int restore_planes =
((f->frame_hdr.restoration.type[0] != RESTORATION_NONE) << 0) +
((f->frame_hdr.restoration.type[1] != RESTORATION_NONE) << 1) +
((f->frame_hdr.restoration.type[2] != RESTORATION_NONE) << 2);
((f->frame_hdr->restoration.type[0] != DAV1D_RESTORATION_NONE) << 0) +
((f->frame_hdr->restoration.type[1] != DAV1D_RESTORATION_NONE) << 1) +
((f->frame_hdr->restoration.type[2] != DAV1D_RESTORATION_NONE) << 2);
if (restore_planes & LR_RESTORE_Y) {
const int h = f->bh << 2;
const int w = f->bw << 2;
const int row_h = imin((sby + 1) << (6 + f->seq_hdr.sb128), h);
const int y_stripe = (sby << (6 + f->seq_hdr.sb128)) - offset;
backup_lpf(f->lf.lr_lpf_line_ptr[0], sizeof(pixel) * f->b4_stride * 4,
const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h - 4);
const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset;
backup_lpf(f, f->lf.lr_lpf_line_ptr[0], lr_stride,
src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
0, f->seq_hdr.sb128, y_stripe, row_h, w);
0, f->seq_hdr->sb128, y_stripe, row_h, w, 0);
}
if (restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) {
const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int h = f->bh << (2 - ss_ver);
const int w = f->bw << (2 - ss_hor);
const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr.sb128), h);
const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h - 4);
const ptrdiff_t offset_uv = offset >> ss_ver;
const int y_stripe =
(sby << ((6 - ss_ver) + f->seq_hdr.sb128)) - offset_uv;
(sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
if (restore_planes & LR_RESTORE_U) {
backup_lpf(f->lf.lr_lpf_line_ptr[1], sizeof(pixel) * f->b4_stride * 4,
backup_lpf(f, f->lf.lr_lpf_line_ptr[1], lr_stride,
src[1] - offset_uv * PXSTRIDE(src_stride[1]), src_stride[1],
ss_ver, f->seq_hdr.sb128, y_stripe, row_h, w);
ss_ver, f->seq_hdr->sb128, y_stripe, row_h, w, ss_hor);
}
if (restore_planes & LR_RESTORE_V) {
backup_lpf(f->lf.lr_lpf_line_ptr[2], sizeof(pixel) * f->b4_stride * 4,
backup_lpf(f, f->lf.lr_lpf_line_ptr[2], lr_stride,
src[2] - offset_uv * PXSTRIDE(src_stride[1]), src_stride[1],
ss_ver, f->seq_hdr.sb128, y_stripe, row_h, w);
ss_ver, f->seq_hdr->sb128, y_stripe, row_h, w, ss_hor);
}
}
}
static void lr_stripe(const Dav1dFrameContext *const f, pixel *p,
const pixel (*left)[4], int x, int y,
const int plane, const int unit_w, const int row_h,
@ -131,18 +148,18 @@ static void lr_stripe(const Dav1dFrameContext *const f, pixel *p,
{
const Dav1dDSPContext *const dsp = f->dsp;
const int chroma = !!plane;
const int ss_ver = chroma & (f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);
const int ss_ver = chroma & (f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);
const int sbrow_has_bottom = (edges & LR_HAVE_BOTTOM);
const pixel *lpf = f->lf.lr_lpf_line_ptr[plane] + x;
const ptrdiff_t p_stride = f->cur.p.stride[chroma];
const ptrdiff_t lpf_stride = sizeof(pixel) * f->b4_stride * 4;
const ptrdiff_t p_stride = f->sr_cur.p.stride[chroma];
const ptrdiff_t lpf_stride = sizeof(pixel) * ((f->sr_cur.p.p.w + 31) & ~31);
// The first stripe of the frame is shorter by 8 luma pixel rows.
int stripe_h = imin((64 - 8 * !y) >> ss_ver, row_h - y);
// FIXME [8] might be easier for SIMD
int16_t filterh[7], filterv[7];
if (lr->type == RESTORATION_WIENER) {
if (lr->type == DAV1D_RESTORATION_WIENER) {
filterh[0] = filterh[6] = lr->filter_h[0];
filterh[1] = filterh[5] = lr->filter_h[1];
filterh[2] = filterh[4] = lr->filter_h[2];
@ -161,11 +178,11 @@ static void lr_stripe(const Dav1dFrameContext *const f, pixel *p,
} else {
edges |= LR_HAVE_BOTTOM;
}
if (lr->type == RESTORATION_WIENER) {
if (lr->type == DAV1D_RESTORATION_WIENER) {
dsp->lr.wiener(p, p_stride, left, lpf, lpf_stride, unit_w, stripe_h,
filterh, filterv, edges);
} else {
assert(lr->type == RESTORATION_SGRPROJ);
assert(lr->type == DAV1D_RESTORATION_SGRPROJ);
dsp->lr.selfguided(p, p_stride, left, lpf, lpf_stride, unit_w, stripe_h,
lr->sgr_idx, lr->sgr_weights, edges);
}
@ -192,11 +209,11 @@ static void lr_sbrow(const Dav1dFrameContext *const f, pixel *p, const int y,
const int w, const int h, const int row_h, const int plane)
{
const int chroma = !!plane;
const int ss_ver = chroma & (f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);
const int ss_hor = chroma & (f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444);
const ptrdiff_t p_stride = f->cur.p.stride[chroma];
const int ss_ver = chroma & (f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);
const int ss_hor = chroma & (f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444);
const ptrdiff_t p_stride = f->sr_cur.p.stride[chroma];
const int unit_size_log2 = f->frame_hdr.restoration.unit_size[!!plane];
const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!plane];
const int unit_size = 1 << unit_size_log2;
const int half_unit_size = unit_size >> 1;
const int max_unit_size = unit_size + half_unit_size;
@ -211,42 +228,38 @@ static void lr_sbrow(const Dav1dFrameContext *const f, pixel *p, const int y,
// with a 4:2:0 chroma subsampling, do we store the filter information at
// the AV1Filter unit located at (128,128) or (256,256)
// TODO Support chroma subsampling.
const int shift_ver = 7 - ss_ver;
const int shift_hor = 7 - ss_hor;
int ruy = (row_y >> unit_size_log2);
// Merge last restoration unit if its height is < half_unit_size
if (ruy > 0) ruy -= (ruy << unit_size_log2) + half_unit_size > h;
pixel pre_lr_border[2][128 + 8 /* maximum sbrow height is 128 + 8 rows offset */][4];
int unit_w = unit_size, bit = 0;
enum LrEdgeFlags edges = (y > 0 ? LR_HAVE_TOP : 0) |
enum LrEdgeFlags edges = (y > 0 ? LR_HAVE_TOP : 0) | LR_HAVE_RIGHT |
(row_h < h ? LR_HAVE_BOTTOM : 0);
for (int x = 0, rux = 0; x < w; x+= unit_w, rux++, edges |= LR_HAVE_LEFT, bit ^= 1) {
// TODO Clean up this if statement.
int aligned_unit_pos = row_y & ~(unit_size - 1);
if (aligned_unit_pos && aligned_unit_pos + half_unit_size > h)
aligned_unit_pos -= unit_size;
aligned_unit_pos <<= ss_ver;
const int sb_idx = (aligned_unit_pos >> 7) * f->sr_sb128w;
const int unit_idx = ((aligned_unit_pos >> 6) & 1) << 1;
for (int x = 0; x < w; x += unit_w, edges |= LR_HAVE_LEFT, bit ^= 1) {
if (x + max_unit_size > w) {
unit_w = w - x;
edges &= ~LR_HAVE_RIGHT;
} else {
edges |= LR_HAVE_RIGHT;
}
// Based on the position of the restoration unit, find the corresponding
// AV1Filter unit.
const int unit_idx = ((ruy & 16) >> 3) + ((rux & 16) >> 4);
const int u_idx = unit_idx + ((x >> (shift_hor - 1)) & 1);
const Av1RestorationUnit *const lr =
&f->lf.mask[(((ruy << (unit_size_log2)) >> shift_ver) * f->sb128w) +
(x >> shift_hor)].lr[plane][unit_idx];
&f->lf.lr_mask[sb_idx + (x >> shift_hor)].lr[plane][u_idx];
// FIXME Don't backup if the next restoration unit is RESTORE_NONE
// This also requires not restoring in the same conditions.
if (edges & LR_HAVE_RIGHT) {
backup4xU(pre_lr_border[bit], p + unit_w - 4, p_stride, row_h - y);
}
if (lr->type != RESTORATION_NONE) {
if (lr->type != DAV1D_RESTORATION_NONE) {
lr_stripe(f, p, pre_lr_border[!bit], x, y, plane, unit_w, row_h, lr, edges);
}
p += unit_w;
@ -257,30 +270,30 @@ void bytefn(dav1d_lr_sbrow)(Dav1dFrameContext *const f, pixel *const dst[3],
const int sby)
{
const ptrdiff_t offset_y = 8 * !!sby;
const ptrdiff_t *const dst_stride = f->cur.p.stride;
const ptrdiff_t *const dst_stride = f->sr_cur.p.stride;
const int restore_planes =
((f->frame_hdr.restoration.type[0] != RESTORATION_NONE) << 0) +
((f->frame_hdr.restoration.type[1] != RESTORATION_NONE) << 1) +
((f->frame_hdr.restoration.type[2] != RESTORATION_NONE) << 2);
((f->frame_hdr->restoration.type[0] != DAV1D_RESTORATION_NONE) << 0) +
((f->frame_hdr->restoration.type[1] != DAV1D_RESTORATION_NONE) << 1) +
((f->frame_hdr->restoration.type[2] != DAV1D_RESTORATION_NONE) << 2);
if (restore_planes & LR_RESTORE_Y) {
const int h = f->cur.p.p.h;
const int w = f->cur.p.p.w;
const int row_h = imin((sby + 1) << (6 + f->seq_hdr.sb128), h);
const int y_stripe = (sby << (6 + f->seq_hdr.sb128)) - offset_y;
const int h = f->sr_cur.p.p.h;
const int w = f->sr_cur.p.p.w;
const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h);
const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset_y;
lr_sbrow(f, dst[0] - offset_y * PXSTRIDE(dst_stride[0]), y_stripe, w,
h, row_h, 0);
}
if (restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) {
const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int h = (f->cur.p.p.h + ss_ver) >> ss_ver;
const int w = (f->cur.p.p.w + ss_hor) >> ss_hor;
const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr.sb128), h);
const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int h = (f->sr_cur.p.p.h + ss_ver) >> ss_ver;
const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h);
const ptrdiff_t offset_uv = offset_y >> ss_ver;
const int y_stripe =
(sby << ((6 - ss_ver) + f->seq_hdr.sb128)) - offset_uv;
(sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
if (restore_planes & LR_RESTORE_U)
lr_sbrow(f, dst[1] - offset_uv * PXSTRIDE(dst_stride[1]), y_stripe,
w, h, row_h, 1);

View File

@ -105,6 +105,12 @@ void (name)(intptr_t bw, intptr_t bh, intptr_t iw, intptr_t ih, intptr_t x, intp
pixel *dst, ptrdiff_t dst_stride, const pixel *src, ptrdiff_t src_stride)
typedef decl_emu_edge_fn(*emu_edge_fn);
#define decl_resize_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \
const pixel *src, ptrdiff_t src_stride, \
int dst_w, int src_w, int h, int dx, int mx)
typedef decl_resize_fn(*resize_fn);
typedef struct Dav1dMCDSPContext {
mc_fn mc[N_2D_FILTERS];
mc_scaled_fn mc_scaled[N_2D_FILTERS];
@ -120,6 +126,7 @@ typedef struct Dav1dMCDSPContext {
warp8x8_fn warp8x8;
warp8x8t_fn warp8x8t;
emu_edge_fn emu_edge;
resize_fn resize;
} Dav1dMCDSPContext;
void dav1d_mc_dsp_init_8bpc(Dav1dMCDSPContext *c);

View File

@ -72,11 +72,11 @@ prep_c(coef *tmp, const pixel *src, const ptrdiff_t src_stride,
F[6] * src[x + +3 * stride] + \
F[7] * src[x + +4 * stride])
#define FILTER_8TAP_RND(src, x, F, stride, sh) \
#define DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh) \
((FILTER_8TAP(src, x, F, stride) + ((1 << sh) >> 1)) >> sh)
#define FILTER_8TAP_CLIP(src, x, F, stride, sh) \
iclip_pixel(FILTER_8TAP_RND(src, x, F, stride, sh))
#define DAV1D_FILTER_8TAP_CLIP(src, x, F, stride, sh) \
iclip_pixel(DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh))
#define GET_H_FILTER(mx) \
const int8_t *const fh = !(mx) ? NULL : w > 4 ? \
@ -110,7 +110,7 @@ put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
src -= src_stride * 3;
do {
for (int x = 0; x < w; x++)
mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 2);
mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, 2);
mid_ptr += 128;
src += src_stride;
@ -119,7 +119,7 @@ put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
mid_ptr = mid + 128 * 3;
do {
for (int x = 0; x < w; x++)
dst[x] = FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 10);
dst[x] = DAV1D_FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 10);
mid_ptr += 128;
dst += dst_stride;
@ -127,7 +127,7 @@ put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
} else {
do {
for (int x = 0; x < w; x++) {
const int px = FILTER_8TAP_RND(src, x, fh, 1, 2);
const int px = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, 2);
dst[x] = iclip_pixel((px + 8) >> 4);
}
@ -138,7 +138,7 @@ put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
} else if (fv) {
do {
for (int x = 0; x < w; x++)
dst[x] = FILTER_8TAP_CLIP(src, x, fv, src_stride, 6);
dst[x] = DAV1D_FILTER_8TAP_CLIP(src, x, fv, src_stride, 6);
dst += dst_stride;
src += src_stride;
@ -164,7 +164,7 @@ put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride,
for (x = 0; x < w; x++) {
GET_H_FILTER(imx >> 6);
mid_ptr[x] = fh ? FILTER_8TAP_RND(src, ioff, fh, 1, 2) : src[ioff] << 4;
mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1, 2) : src[ioff] << 4;
imx += dx;
ioff += imx >> 10;
imx &= 0x3ff;
@ -180,7 +180,7 @@ put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride,
GET_V_FILTER(my >> 6);
for (x = 0; x < w; x++)
dst[x] = fv ? FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 10) :
dst[x] = fv ? DAV1D_FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 10) :
iclip_pixel((mid_ptr[x] + 8) >> 4);
my += dy;
@ -206,7 +206,7 @@ prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
src -= src_stride * 3;
do {
for (int x = 0; x < w; x++)
mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 2);
mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, 2);
mid_ptr += 128;
src += src_stride;
@ -215,7 +215,7 @@ prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
mid_ptr = mid + 128 * 3;
do {
for (int x = 0; x < w; x++)
tmp[x] = FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6);
tmp[x] = DAV1D_FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6);
mid_ptr += 128;
tmp += w;
@ -223,7 +223,7 @@ prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
} else {
do {
for (int x = 0; x < w; x++)
tmp[x] = FILTER_8TAP_RND(src, x, fh, 1, 2);
tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, 2);
tmp += w;
src += src_stride;
@ -232,7 +232,7 @@ prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
} else if (fv) {
do {
for (int x = 0; x < w; x++)
tmp[x] = FILTER_8TAP_RND(src, x, fv, src_stride, 2);
tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fv, src_stride, 2);
tmp += w;
src += src_stride;
@ -257,7 +257,7 @@ prep_8tap_scaled_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
for (x = 0; x < w; x++) {
GET_H_FILTER(imx >> 6);
mid_ptr[x] = fh ? FILTER_8TAP_RND(src, ioff, fh, 1, 2) : src[ioff] << 4;
mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1, 2) : src[ioff] << 4;
imx += dx;
ioff += imx >> 10;
imx &= 0x3ff;
@ -273,7 +273,7 @@ prep_8tap_scaled_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
GET_V_FILTER(my >> 6);
for (x = 0; x < w; x++)
tmp[x] = fv ? FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6) : mid_ptr[x];
tmp[x] = fv ? DAV1D_FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6) : mid_ptr[x];
my += dy;
mid_ptr += (my >> 10) * 128;
@ -324,15 +324,15 @@ static void prep_8tap_##type##_scaled_c(coef *const tmp, \
type_h | (type_v << 2)); \
}
filter_fns(regular, FILTER_8TAP_REGULAR, FILTER_8TAP_REGULAR)
filter_fns(regular_sharp, FILTER_8TAP_REGULAR, FILTER_8TAP_SHARP)
filter_fns(regular_smooth, FILTER_8TAP_REGULAR, FILTER_8TAP_SMOOTH)
filter_fns(smooth, FILTER_8TAP_SMOOTH, FILTER_8TAP_SMOOTH)
filter_fns(smooth_regular, FILTER_8TAP_SMOOTH, FILTER_8TAP_REGULAR)
filter_fns(smooth_sharp, FILTER_8TAP_SMOOTH, FILTER_8TAP_SHARP)
filter_fns(sharp, FILTER_8TAP_SHARP, FILTER_8TAP_SHARP)
filter_fns(sharp_regular, FILTER_8TAP_SHARP, FILTER_8TAP_REGULAR)
filter_fns(sharp_smooth, FILTER_8TAP_SHARP, FILTER_8TAP_SMOOTH)
filter_fns(regular, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_REGULAR)
filter_fns(regular_sharp, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SHARP)
filter_fns(regular_smooth, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SMOOTH)
filter_fns(smooth, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SMOOTH)
filter_fns(smooth_regular, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_REGULAR)
filter_fns(smooth_sharp, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SHARP)
filter_fns(sharp, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SHARP)
filter_fns(sharp_regular, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_REGULAR)
filter_fns(sharp_smooth, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SMOOTH)
#define FILTER_BILIN(src, x, mxy, stride) \
(16 * src[x] + ((mxy) * (src[x + stride] - src[x])))
@ -782,6 +782,34 @@ static void emu_edge_c(const intptr_t bw, const intptr_t bh,
}
}
static void resize_c(pixel *dst, const ptrdiff_t dst_stride,
const pixel *src, const ptrdiff_t src_stride,
const int dst_w, const int src_w, int h,
const int dx, const int mx0)
{
do {
int mx = mx0, src_x = -1;
for (int x = 0; x < dst_w; x++) {
const int16_t *const F = dav1d_resize_filter[mx >> 8];
dst[x] = iclip_pixel((F[0] * src[iclip(src_x - 3, 0, src_w - 1)] +
F[1] * src[iclip(src_x - 2, 0, src_w - 1)] +
F[2] * src[iclip(src_x - 1, 0, src_w - 1)] +
F[3] * src[iclip(src_x + 0, 0, src_w - 1)] +
F[4] * src[iclip(src_x + 1, 0, src_w - 1)] +
F[5] * src[iclip(src_x + 2, 0, src_w - 1)] +
F[6] * src[iclip(src_x + 3, 0, src_w - 1)] +
F[7] * src[iclip(src_x + 4, 0, src_w - 1)] +
64) >> 7);
mx += dx;
src_x += mx >> 14;
mx &= 0x3fff;
}
dst += PXSTRIDE(dst_stride);
src += PXSTRIDE(src_stride);
} while (--h);
}
void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
#define init_mc_fns(type, name) do { \
c->mc [type] = put_##name##_c; \
@ -813,6 +841,7 @@ void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
c->warp8x8 = warp_affine_8x8_c;
c->warp8x8t = warp_affine_8x8t_c;
c->emu_edge = emu_edge_c;
c->resize = resize_c;
#if HAVE_ASM
#if ARCH_AARCH64 || ARCH_ARM

View File

@ -62,7 +62,8 @@ libdav1d_tmpl_sources = files(
'cdef_tmpl.c',
'lr_apply_tmpl.c',
'looprestoration_tmpl.c',
'recon_tmpl.c'
'recon_tmpl.c',
'film_grain_tmpl.c',
)
# libdav1d entrypoint source files
@ -83,10 +84,12 @@ if is_asm_enabled
'arm/cpu.c',
)
libdav1d_tmpl_sources += files(
'arm/looprestoration_init_tmpl.c',
'arm/mc_init_tmpl.c',
)
if host_machine.cpu_family() == 'aarch64'
libdav1d_sources += files(
'arm/64/looprestoration.S',
'arm/64/mc.S',
)
elif host_machine.cpu_family().startswith('arm')
@ -118,6 +121,7 @@ if is_asm_enabled
'x86/loopfilter.asm',
'x86/looprestoration.asm',
'x86/mc.asm',
'x86/mc_ssse3.asm',
)
# Compile the ASM sources with NASM

View File

@ -41,10 +41,10 @@
#include "src/levels.h"
#include "src/obu.h"
#include "src/ref.h"
#include "src/warpmv.h"
#include "src/thread_task.h"
static int parse_seq_hdr(Dav1dContext *const c, GetBits *const gb,
Av1SequenceHeader *const hdr)
Dav1dSequenceHeader *const hdr)
{
#define DEBUG_SEQ_HDR 0
@ -105,7 +105,7 @@ static int parse_seq_hdr(Dav1dContext *const c, GetBits *const gb,
hdr->display_model_info_present = dav1d_get_bits(gb, 1);
hdr->num_operating_points = dav1d_get_bits(gb, 5) + 1;
for (int i = 0; i < hdr->num_operating_points; i++) {
struct Av1SequenceHeaderOperatingPoint *const op =
struct Dav1dSequenceHeaderOperatingPoint *const op =
&hdr->operating_points[i];
op->idc = dav1d_get_bits(gb, 12);
op->major_level = 2 + dav1d_get_bits(gb, 3);
@ -126,6 +126,10 @@ static int parse_seq_hdr(Dav1dContext *const c, GetBits *const gb,
op->initial_display_delay = dav1d_get_bits(gb, 4) + 1;
}
}
if (c->operating_point < hdr->num_operating_points)
c->operating_point_idc = hdr->operating_points[c->operating_point].idc;
else
c->operating_point_idc = hdr->operating_points[0].idc;
#if DEBUG_SEQ_HDR
printf("SEQHDR: post-operating-points: off=%ld\n",
dav1d_get_bits_pos(gb) - init_bit_pos);
@ -163,8 +167,8 @@ static int parse_seq_hdr(Dav1dContext *const c, GetBits *const gb,
hdr->jnt_comp = 0;
hdr->ref_frame_mvs = 0;
hdr->order_hint_n_bits = 0;
hdr->screen_content_tools = ADAPTIVE;
hdr->force_integer_mv = ADAPTIVE;
hdr->screen_content_tools = DAV1D_ADAPTIVE;
hdr->force_integer_mv = DAV1D_ADAPTIVE;
} else {
hdr->inter_intra = dav1d_get_bits(gb, 1);
hdr->masked_compound = dav1d_get_bits(gb, 1);
@ -179,13 +183,13 @@ static int parse_seq_hdr(Dav1dContext *const c, GetBits *const gb,
hdr->ref_frame_mvs = 0;
hdr->order_hint_n_bits = 0;
}
hdr->screen_content_tools = dav1d_get_bits(gb, 1) ? ADAPTIVE : dav1d_get_bits(gb, 1);
hdr->screen_content_tools = dav1d_get_bits(gb, 1) ? DAV1D_ADAPTIVE : dav1d_get_bits(gb, 1);
#if DEBUG_SEQ_HDR
printf("SEQHDR: post-screentools: off=%ld\n",
dav1d_get_bits_pos(gb) - init_bit_pos);
#endif
hdr->force_integer_mv = hdr->screen_content_tools ?
dav1d_get_bits(gb, 1) ? ADAPTIVE : dav1d_get_bits(gb, 1) : 2;
dav1d_get_bits(gb, 1) ? DAV1D_ADAPTIVE : dav1d_get_bits(gb, 1) : 2;
if (hdr->order_hint)
hdr->order_hint_n_bits = dav1d_get_bits(gb, 3) + 1;
}
@ -197,10 +201,9 @@ static int parse_seq_hdr(Dav1dContext *const c, GetBits *const gb,
dav1d_get_bits_pos(gb) - init_bit_pos);
#endif
const int hbd = dav1d_get_bits(gb, 1);
hdr->bpc = hdr->profile == 2 && hbd ? 10U + 2 * dav1d_get_bits(gb, 1) : 8U + 2 * hbd;
hdr->hbd = hdr->bpc > 8;
const int monochrome = hdr->profile != 1 ? dav1d_get_bits(gb, 1) : 0;
hdr->hbd = dav1d_get_bits(gb, 1);
if (hdr->profile == 2 && hdr->hbd) hdr->hbd += dav1d_get_bits(gb, 1);
hdr->monochrome = hdr->profile != 1 ? dav1d_get_bits(gb, 1) : 0;
hdr->color_description_present = dav1d_get_bits(gb, 1);
if (hdr->color_description_present) {
hdr->pri = dav1d_get_bits(gb, 8);
@ -211,9 +214,10 @@ static int parse_seq_hdr(Dav1dContext *const c, GetBits *const gb,
hdr->trc = DAV1D_TRC_UNKNOWN;
hdr->mtrx = DAV1D_MC_UNKNOWN;
}
if (monochrome) {
if (hdr->monochrome) {
hdr->color_range = dav1d_get_bits(gb, 1);
hdr->layout = DAV1D_PIXEL_LAYOUT_I400;
hdr->ss_hor = hdr->ss_ver = 0;
hdr->chr = DAV1D_CHR_UNKNOWN;
hdr->separate_uv_delta_q = 0;
} else if (hdr->pri == DAV1D_COLOR_PRI_BT709 &&
@ -221,26 +225,35 @@ static int parse_seq_hdr(Dav1dContext *const c, GetBits *const gb,
hdr->mtrx == DAV1D_MC_IDENTITY)
{
hdr->layout = DAV1D_PIXEL_LAYOUT_I444;
hdr->ss_hor = hdr->ss_ver = 1;
hdr->color_range = 1;
if (hdr->profile != 1 && !(hdr->profile == 2 && hdr->bpc == 12))
if (hdr->profile != 1 && !(hdr->profile == 2 && hdr->hbd == 2))
goto error;
} else {
hdr->color_range = dav1d_get_bits(gb, 1);
switch (hdr->profile) {
case 0: hdr->layout = DAV1D_PIXEL_LAYOUT_I420; break;
case 1: hdr->layout = DAV1D_PIXEL_LAYOUT_I444; break;
case 0: hdr->layout = DAV1D_PIXEL_LAYOUT_I420;
hdr->ss_hor = hdr->ss_ver = 1;
break;
case 1: hdr->layout = DAV1D_PIXEL_LAYOUT_I444;
hdr->ss_hor = hdr->ss_ver = 0;
break;
case 2:
if (hdr->bpc == 12) {
hdr->layout = dav1d_get_bits(gb, 1) ?
dav1d_get_bits(gb, 1) ? DAV1D_PIXEL_LAYOUT_I420 :
DAV1D_PIXEL_LAYOUT_I422 :
DAV1D_PIXEL_LAYOUT_I444;
} else
hdr->layout = DAV1D_PIXEL_LAYOUT_I422;
if (hdr->hbd == 2) {
hdr->ss_hor = dav1d_get_bits(gb, 1);
hdr->ss_ver = hdr->ss_hor && dav1d_get_bits(gb, 1);
} else {
hdr->ss_hor = 1;
hdr->ss_ver = 0;
}
hdr->layout = hdr->ss_hor ?
hdr->ss_ver ? DAV1D_PIXEL_LAYOUT_I420 :
DAV1D_PIXEL_LAYOUT_I422 :
DAV1D_PIXEL_LAYOUT_I444;
break;
}
if (hdr->layout == DAV1D_PIXEL_LAYOUT_I420)
hdr->chr = dav1d_get_bits(gb, 2);
hdr->chr = hdr->ss_hor == 1 && hdr->ss_ver == 1 ?
dav1d_get_bits(gb, 2) : DAV1D_CHR_UNKNOWN;
hdr->separate_uv_delta_q = dav1d_get_bits(gb, 1);
}
#if DEBUG_SEQ_HDR
@ -270,39 +283,54 @@ error:
static int read_frame_size(Dav1dContext *const c, GetBits *const gb,
const int use_ref)
{
const Av1SequenceHeader *const seqhdr = &c->seq_hdr;
Av1FrameHeader *const hdr = &c->frame_hdr;
const Dav1dSequenceHeader *const seqhdr = c->seq_hdr;
Dav1dFrameHeader *const hdr = c->frame_hdr;
if (use_ref) {
for (int i = 0; i < 7; i++) {
if (dav1d_get_bits(gb, 1)) {
Dav1dThreadPicture *const ref =
&c->refs[c->frame_hdr.refidx[i]].p;
&c->refs[c->frame_hdr->refidx[i]].p;
if (!ref->p.data[0]) return -1;
// FIXME render_* may be wrong
hdr->render_width = hdr->width = ref->p.p.w;
hdr->render_width = hdr->width[1] = ref->p.p.w;
hdr->render_height = hdr->height = ref->p.p.h;
hdr->super_res = 0; // FIXME probably wrong
hdr->super_res.enabled = seqhdr->super_res && dav1d_get_bits(gb, 1);
if (hdr->super_res.enabled) {
const int d = hdr->super_res.width_scale_denominator =
9 + dav1d_get_bits(gb, 3);
hdr->width[0] = imax((hdr->width[1] * 8 + (d >> 1)) / d,
imin(16, hdr->width[1]));
} else {
hdr->super_res.width_scale_denominator = 8;
hdr->width[0] = hdr->width[1];
}
return 0;
}
}
}
if (hdr->frame_size_override) {
hdr->width = dav1d_get_bits(gb, seqhdr->width_n_bits) + 1;
hdr->width[1] = dav1d_get_bits(gb, seqhdr->width_n_bits) + 1;
hdr->height = dav1d_get_bits(gb, seqhdr->height_n_bits) + 1;
} else {
hdr->width = seqhdr->max_width;
hdr->width[1] = seqhdr->max_width;
hdr->height = seqhdr->max_height;
}
hdr->super_res = seqhdr->super_res && dav1d_get_bits(gb, 1);
if (hdr->super_res) return -1; // FIXME
hdr->super_res.enabled = seqhdr->super_res && dav1d_get_bits(gb, 1);
if (hdr->super_res.enabled) {
const int d = hdr->super_res.width_scale_denominator = 9 + dav1d_get_bits(gb, 3);
hdr->width[0] = imax((hdr->width[1] * 8 + (d >> 1)) / d, imin(16, hdr->width[1]));
} else {
hdr->super_res.width_scale_denominator = 8;
hdr->width[0] = hdr->width[1];
}
hdr->have_render_size = dav1d_get_bits(gb, 1);
if (hdr->have_render_size) {
hdr->render_width = dav1d_get_bits(gb, 16) + 1;
hdr->render_height = dav1d_get_bits(gb, 16) + 1;
} else {
hdr->render_width = hdr->width;
hdr->render_width = hdr->width[1];
hdr->render_height = hdr->height;
}
return 0;
@ -314,7 +342,7 @@ static inline int tile_log2(int sz, int tgt) {
return k;
}
static const Av1LoopfilterModeRefDeltas default_mode_ref_deltas = {
static const Dav1dLoopfilterModeRefDeltas default_mode_ref_deltas = {
.mode_delta = { 0, 0 },
.ref_delta = { 1, 0, 0, 0, -1, 0, -1, -1 },
};
@ -325,8 +353,8 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
#if DEBUG_FRAME_HDR
const uint8_t *const init_ptr = gb->ptr;
#endif
const Av1SequenceHeader *const seqhdr = &c->seq_hdr;
Av1FrameHeader *const hdr = &c->frame_hdr;
const Dav1dSequenceHeader *const seqhdr = c->seq_hdr;
Dav1dFrameHeader *const hdr = c->frame_hdr;
int res;
hdr->show_existing_frame =
@ -360,10 +388,10 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
(gb->ptr - init_ptr) * 8 - gb->bits_left);
#endif
hdr->disable_cdf_update = dav1d_get_bits(gb, 1);
hdr->allow_screen_content_tools = seqhdr->screen_content_tools == ADAPTIVE ?
hdr->allow_screen_content_tools = seqhdr->screen_content_tools == DAV1D_ADAPTIVE ?
dav1d_get_bits(gb, 1) : seqhdr->screen_content_tools;
if (hdr->allow_screen_content_tools)
hdr->force_integer_mv = seqhdr->force_integer_mv == ADAPTIVE ?
hdr->force_integer_mv = seqhdr->force_integer_mv == DAV1D_ADAPTIVE ?
dav1d_get_bits(gb, 1) : seqhdr->force_integer_mv;
else
hdr->force_integer_mv = 0;
@ -383,18 +411,18 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
hdr->frame_offset = seqhdr->order_hint ?
dav1d_get_bits(gb, seqhdr->order_hint_n_bits) : 0;
hdr->primary_ref_frame = !hdr->error_resilient_mode && hdr->frame_type & 1 ?
dav1d_get_bits(gb, 3) : PRIMARY_REF_NONE;
dav1d_get_bits(gb, 3) : DAV1D_PRIMARY_REF_NONE;
if (seqhdr->decoder_model_info_present) {
hdr->buffer_removal_time_present = dav1d_get_bits(gb, 1);
if (hdr->buffer_removal_time_present) {
for (int i = 0; i < c->seq_hdr.num_operating_points; i++) {
const struct Av1SequenceHeaderOperatingPoint *const seqop = &seqhdr->operating_points[i];
struct Av1FrameHeaderOperatingPoint *const op = &hdr->operating_points[i];
for (int i = 0; i < c->seq_hdr->num_operating_points; i++) {
const struct Dav1dSequenceHeaderOperatingPoint *const seqop = &seqhdr->operating_points[i];
struct Dav1dFrameHeaderOperatingPoint *const op = &hdr->operating_points[i];
if (seqop->decoder_model_param_present) {
int in_temporal_layer = (seqop->idc >> 0 /* FIXME: temporal_id */ ) & 1;
int in_spatial_layer = (seqop->idc >> (0 /* FIXME: spatial_id */ + 8)) & 1;
if (!seqop->idc || in_temporal_layer || in_spatial_layer)
int in_temporal_layer = (seqop->idc >> hdr->temporal_id) & 1;
int in_spatial_layer = (seqop->idc >> (hdr->spatial_id + 8)) & 1;
if (!seqop->idc || (in_temporal_layer && in_spatial_layer))
op->buffer_removal_time = dav1d_get_bits(gb, seqhdr->buffer_removal_delay_length);
}
}
@ -411,7 +439,7 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
dav1d_get_bits(gb, seqhdr->order_hint_n_bits);
if ((res = read_frame_size(c, gb, 0)) < 0) goto error;
hdr->allow_intrabc = hdr->allow_screen_content_tools &&
/* FIXME: no superres scaling && */ dav1d_get_bits(gb, 1);
!hdr->super_res.enabled && dav1d_get_bits(gb, 1);
hdr->use_ref_frame_mvs = 0;
} else {
hdr->allow_intrabc = 0;
@ -422,9 +450,99 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
dav1d_get_bits(gb, seqhdr->order_hint_n_bits);
hdr->frame_ref_short_signaling =
seqhdr->order_hint && dav1d_get_bits(gb, 1);
if (hdr->frame_ref_short_signaling) goto error; // FIXME
if (hdr->frame_ref_short_signaling) { // FIXME: Nearly verbatim copy from section 7.8
hdr->refidx[0] = dav1d_get_bits(gb, 3);
hdr->refidx[1] = hdr->refidx[2] = -1;
hdr->refidx[3] = dav1d_get_bits(gb, 3);
hdr->refidx[4] = hdr->refidx[5] = hdr->refidx[6] = -1;
int shifted_frame_offset[8];
const int current_frame_offset = 1 << (seqhdr->order_hint_n_bits - 1);
for (int i = 0; i < 8; i++) {
if (!c->refs[i].p.p.frame_hdr) goto error;
shifted_frame_offset[i] = current_frame_offset +
get_poc_diff(seqhdr->order_hint_n_bits,
c->refs[i].p.p.frame_hdr->frame_offset,
hdr->frame_offset);
}
int used_frame[8] = { 0 };
used_frame[hdr->refidx[0]] = 1;
used_frame[hdr->refidx[3]] = 1;
int latest_frame_offset = -1;
for (int i = 0; i < 8; i++) {
int hint = shifted_frame_offset[i];
if (!used_frame[i] && hint >= current_frame_offset &&
hint >= latest_frame_offset)
{
hdr->refidx[6] = i;
latest_frame_offset = hint;
}
}
if (latest_frame_offset != -1)
used_frame[hdr->refidx[6]] = 1;
int earliest_frame_offset = INT_MAX;
for (int i = 0; i < 8; i++) {
int hint = shifted_frame_offset[i];
if (!used_frame[i] && hint >= current_frame_offset &&
hint < earliest_frame_offset)
{
hdr->refidx[4] = i;
earliest_frame_offset = hint;
}
}
if (earliest_frame_offset != INT_MAX)
used_frame[hdr->refidx[4]] = 1;
earliest_frame_offset = INT_MAX;
for (int i = 0; i < 8; i++) {
int hint = shifted_frame_offset[i];
if (!used_frame[i] && hint >= current_frame_offset &&
(hint < earliest_frame_offset))
{
hdr->refidx[5] = i;
earliest_frame_offset = hint;
}
}
if (earliest_frame_offset != INT_MAX)
used_frame[hdr->refidx[5]] = 1;
for (int i = 1; i < 7; i++) {
if (hdr->refidx[i] < 0) {
latest_frame_offset = -1;
for (int j = 0; j < 8; j++) {
int hint = shifted_frame_offset[j];
if (!used_frame[j] && hint < current_frame_offset &&
hint >= latest_frame_offset)
{
hdr->refidx[i] = j;
latest_frame_offset = hint;
}
}
if (latest_frame_offset != -1)
used_frame[hdr->refidx[i]] = 1;
}
}
earliest_frame_offset = INT_MAX;
int ref = -1;
for (int i = 0; i < 8; i++) {
int hint = shifted_frame_offset[i];
if (hint < earliest_frame_offset) {
ref = i;
earliest_frame_offset = hint;
}
}
for (int i = 0; i < 7; i++) {
if (hdr->refidx[i] < 0)
hdr->refidx[i] = ref;
}
}
for (int i = 0; i < 7; i++) {
hdr->refidx[i] = dav1d_get_bits(gb, 3);
if (!hdr->frame_ref_short_signaling)
hdr->refidx[i] = dav1d_get_bits(gb, 3);
if (seqhdr->frame_id_numbers_present)
dav1d_get_bits(gb, seqhdr->delta_frame_id_n_bits);
}
@ -432,7 +550,7 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
hdr->frame_size_override;
if ((res = read_frame_size(c, gb, use_ref)) < 0) goto error;
hdr->hp = !hdr->force_integer_mv && dav1d_get_bits(gb, 1);
hdr->subpel_filter_mode = dav1d_get_bits(gb, 1) ? FILTER_SWITCHABLE :
hdr->subpel_filter_mode = dav1d_get_bits(gb, 1) ? DAV1D_FILTER_SWITCHABLE :
dav1d_get_bits(gb, 2);
hdr->switchable_motion_mode = dav1d_get_bits(gb, 1);
hdr->use_ref_frame_mvs = !hdr->error_resilient_mode &&
@ -455,13 +573,13 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
hdr->tiling.uniform = dav1d_get_bits(gb, 1);
const int sbsz_min1 = (64 << seqhdr->sb128) - 1;
int sbsz_log2 = 6 + seqhdr->sb128;
int sbw = (hdr->width + sbsz_min1) >> sbsz_log2;
int sbw = (hdr->width[0] + sbsz_min1) >> sbsz_log2;
int sbh = (hdr->height + sbsz_min1) >> sbsz_log2;
int max_tile_width_sb = 4096 >> sbsz_log2;
int max_tile_area_sb = 4096 * 2304 >> (2 * sbsz_log2);
hdr->tiling.min_log2_cols = tile_log2(max_tile_width_sb, sbw);
hdr->tiling.max_log2_cols = tile_log2(1, imin(sbw, MAX_TILE_COLS));
hdr->tiling.max_log2_rows = tile_log2(1, imin(sbh, MAX_TILE_ROWS));
hdr->tiling.max_log2_cols = tile_log2(1, imin(sbw, DAV1D_MAX_TILE_COLS));
hdr->tiling.max_log2_rows = tile_log2(1, imin(sbh, DAV1D_MAX_TILE_ROWS));
int min_log2_tiles = imax(tile_log2(max_tile_area_sb, sbw * sbh),
hdr->tiling.min_log2_cols);
if (hdr->tiling.uniform) {
@ -485,7 +603,7 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
} else {
hdr->tiling.cols = 0;
int widest_tile = 0, max_tile_area_sb = sbw * sbh;
for (int sbx = 0; sbx < sbw && hdr->tiling.cols < MAX_TILE_COLS; hdr->tiling.cols++) {
for (int sbx = 0; sbx < sbw && hdr->tiling.cols < DAV1D_MAX_TILE_COLS; hdr->tiling.cols++) {
const int tile_width_sb = imin(sbw - sbx, max_tile_width_sb);
const int tile_w = (tile_width_sb > 1) ?
1 + dav1d_get_uniform(gb, tile_width_sb) :
@ -499,7 +617,7 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
int max_tile_height_sb = imax(max_tile_area_sb / widest_tile, 1);
hdr->tiling.rows = 0;
for (int sby = 0; sby < sbh && hdr->tiling.rows < MAX_TILE_ROWS; hdr->tiling.rows++) {
for (int sby = 0; sby < sbh && hdr->tiling.rows < DAV1D_MAX_TILE_ROWS; hdr->tiling.rows++) {
const int tile_height_sb = imin(sbh - sby, max_tile_height_sb);
const int tile_h = (tile_height_sb > 1) ?
1 + dav1d_get_uniform(gb, tile_height_sb) :
@ -528,7 +646,7 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
// quant data
hdr->quant.yac = dav1d_get_bits(gb, 8);
hdr->quant.ydc_delta = dav1d_get_bits(gb, 1) ? dav1d_get_sbits(gb, 6) : 0;
if (seqhdr->layout != DAV1D_PIXEL_LAYOUT_I400) {
if (!seqhdr->monochrome) {
// If the sequence header says that delta_q might be different
// for U, V, we must check whether it actually is for this
// frame.
@ -563,7 +681,7 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
// segmentation data
hdr->segmentation.enabled = dav1d_get_bits(gb, 1);
if (hdr->segmentation.enabled) {
if (hdr->primary_ref_frame == PRIMARY_REF_NONE) {
if (hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
hdr->segmentation.update_map = 1;
hdr->segmentation.temporal = 0;
hdr->segmentation.update_data = 1;
@ -577,8 +695,8 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
if (hdr->segmentation.update_data) {
hdr->segmentation.seg_data.preskip = 0;
hdr->segmentation.seg_data.last_active_segid = -1;
for (int i = 0; i < NUM_SEGMENTS; i++) {
Av1SegmentationData *const seg =
for (int i = 0; i < DAV1D_MAX_SEGMENTS; i++) {
Dav1dSegmentationData *const seg =
&hdr->segmentation.seg_data.d[i];
if (dav1d_get_bits(gb, 1)) {
seg->delta_q = dav1d_get_sbits(gb, 8);
@ -629,13 +747,15 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
} else {
// segmentation.update_data was false so we should copy
// segmentation data from the reference frame.
assert(hdr->primary_ref_frame != PRIMARY_REF_NONE);
assert(hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE);
const int pri_ref = hdr->refidx[hdr->primary_ref_frame];
hdr->segmentation.seg_data = c->refs[pri_ref].seg_data;
if (!c->refs[pri_ref].p.p.frame_hdr) return -EINVAL;
hdr->segmentation.seg_data =
c->refs[pri_ref].p.p.frame_hdr->segmentation.seg_data;
}
} else {
memset(&hdr->segmentation.seg_data, 0, sizeof(Av1SegmentationDataSet));
for (int i = 0; i < NUM_SEGMENTS; i++)
memset(&hdr->segmentation.seg_data, 0, sizeof(Dav1dSegmentationDataSet));
for (int i = 0; i < DAV1D_MAX_SEGMENTS; i++)
hdr->segmentation.seg_data.d[i].ref = -1;
}
#if DEBUG_FRAME_HDR
@ -659,7 +779,7 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
const int delta_lossless = !hdr->quant.ydc_delta && !hdr->quant.udc_delta &&
!hdr->quant.uac_delta && !hdr->quant.vdc_delta && !hdr->quant.vac_delta;
hdr->all_lossless = 1;
for (int i = 0; i < NUM_SEGMENTS; i++) {
for (int i = 0; i < DAV1D_MAX_SEGMENTS; i++) {
hdr->segmentation.qidx[i] = hdr->segmentation.enabled ?
iclip_u8(hdr->quant.yac + hdr->segmentation.seg_data.d[i].delta_q) :
hdr->quant.yac;
@ -679,7 +799,7 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
} else {
hdr->loopfilter.level_y[0] = dav1d_get_bits(gb, 6);
hdr->loopfilter.level_y[1] = dav1d_get_bits(gb, 6);
if (seqhdr->layout != DAV1D_PIXEL_LAYOUT_I400 &&
if (!seqhdr->monochrome &&
(hdr->loopfilter.level_y[0] || hdr->loopfilter.level_y[1]))
{
hdr->loopfilter.level_u = dav1d_get_bits(gb, 6);
@ -687,11 +807,13 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
}
hdr->loopfilter.sharpness = dav1d_get_bits(gb, 3);
if (hdr->primary_ref_frame == PRIMARY_REF_NONE) {
if (hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
hdr->loopfilter.mode_ref_deltas = default_mode_ref_deltas;
} else {
const int ref = hdr->refidx[hdr->primary_ref_frame];
hdr->loopfilter.mode_ref_deltas = c->refs[ref].lf_mode_ref_deltas;
if (!c->refs[ref].p.p.frame_hdr) return -EINVAL;
hdr->loopfilter.mode_ref_deltas =
c->refs[ref].p.p.frame_hdr->loopfilter.mode_ref_deltas;
}
hdr->loopfilter.mode_ref_delta_enabled = dav1d_get_bits(gb, 1);
if (hdr->loopfilter.mode_ref_delta_enabled) {
@ -719,7 +841,7 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
hdr->cdef.n_bits = dav1d_get_bits(gb, 2);
for (int i = 0; i < (1 << hdr->cdef.n_bits); i++) {
hdr->cdef.y_strength[i] = dav1d_get_bits(gb, 6);
if (seqhdr->layout != DAV1D_PIXEL_LAYOUT_I400)
if (!seqhdr->monochrome)
hdr->cdef.uv_strength[i] = dav1d_get_bits(gb, 6);
}
} else {
@ -733,14 +855,16 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
#endif
// restoration
if (!hdr->all_lossless && seqhdr->restoration && !hdr->allow_intrabc) {
if ((!hdr->all_lossless || hdr->super_res.enabled) &&
seqhdr->restoration && !hdr->allow_intrabc)
{
hdr->restoration.type[0] = dav1d_get_bits(gb, 2);
if (seqhdr->layout != DAV1D_PIXEL_LAYOUT_I400) {
if (!seqhdr->monochrome) {
hdr->restoration.type[1] = dav1d_get_bits(gb, 2);
hdr->restoration.type[2] = dav1d_get_bits(gb, 2);
} else {
hdr->restoration.type[1] =
hdr->restoration.type[2] = RESTORATION_NONE;
hdr->restoration.type[2] = DAV1D_RESTORATION_NONE;
}
if (hdr->restoration.type[0] || hdr->restoration.type[1] ||
@ -755,7 +879,7 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
}
hdr->restoration.unit_size[1] = hdr->restoration.unit_size[0];
if ((hdr->restoration.type[1] || hdr->restoration.type[2]) &&
seqhdr->layout == DAV1D_PIXEL_LAYOUT_I420)
seqhdr->ss_hor == 1 && seqhdr->ss_ver == 1)
{
hdr->restoration.unit_size[1] -= dav1d_get_bits(gb, 1);
}
@ -763,17 +887,17 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
hdr->restoration.unit_size[0] = 8;
}
} else {
hdr->restoration.type[0] = RESTORATION_NONE;
hdr->restoration.type[1] = RESTORATION_NONE;
hdr->restoration.type[2] = RESTORATION_NONE;
hdr->restoration.type[0] = DAV1D_RESTORATION_NONE;
hdr->restoration.type[1] = DAV1D_RESTORATION_NONE;
hdr->restoration.type[2] = DAV1D_RESTORATION_NONE;
}
#if DEBUG_FRAME_HDR
printf("HDR: post-restoration: off=%ld\n",
(gb->ptr - init_ptr) * 8 - gb->bits_left);
#endif
hdr->txfm_mode = hdr->all_lossless ? TX_4X4_ONLY :
dav1d_get_bits(gb, 1) ? TX_SWITCHABLE : TX_LARGEST;
hdr->txfm_mode = hdr->all_lossless ? DAV1D_TX_4X4_ONLY :
dav1d_get_bits(gb, 1) ? DAV1D_TX_SWITCHABLE : DAV1D_TX_LARGEST;
#if DEBUG_FRAME_HDR
printf("HDR: post-txfmmode: off=%ld\n",
(gb->ptr - init_ptr) * 8 - gb->bits_left);
@ -790,7 +914,8 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
int off_after = -1;
int off_before_idx[2], off_after_idx;
for (int i = 0; i < 7; i++) {
const unsigned refpoc = c->refs[hdr->refidx[i]].p.p.poc;
if (!c->refs[hdr->refidx[i]].p.p.data[0]) return -EINVAL;
const unsigned refpoc = c->refs[hdr->refidx[i]].p.p.frame_hdr->frame_offset;
const int diff = get_poc_diff(seqhdr->order_hint_n_bits, refpoc, poc);
if (diff > 0) {
@ -854,21 +979,26 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
if (hdr->frame_type & 1) {
for (int i = 0; i < 7; i++) {
hdr->gmv[i].type = !dav1d_get_bits(gb, 1) ? WM_TYPE_IDENTITY :
dav1d_get_bits(gb, 1) ? WM_TYPE_ROT_ZOOM :
dav1d_get_bits(gb, 1) ? WM_TYPE_TRANSLATION :
WM_TYPE_AFFINE;
hdr->gmv[i].type = !dav1d_get_bits(gb, 1) ? DAV1D_WM_TYPE_IDENTITY :
dav1d_get_bits(gb, 1) ? DAV1D_WM_TYPE_ROT_ZOOM :
dav1d_get_bits(gb, 1) ? DAV1D_WM_TYPE_TRANSLATION :
DAV1D_WM_TYPE_AFFINE;
if (hdr->gmv[i].type == WM_TYPE_IDENTITY) continue;
if (hdr->gmv[i].type == DAV1D_WM_TYPE_IDENTITY) continue;
const WarpedMotionParams *const ref_gmv =
hdr->primary_ref_frame == PRIMARY_REF_NONE ? &dav1d_default_wm_params :
&c->refs[hdr->refidx[hdr->primary_ref_frame]].gmv[i];
const Dav1dWarpedMotionParams *ref_gmv;
if (hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
ref_gmv = &dav1d_default_wm_params;
} else {
const int pri_ref = hdr->refidx[hdr->primary_ref_frame];
if (!c->refs[pri_ref].p.p.frame_hdr) return -EINVAL;
ref_gmv = &c->refs[pri_ref].p.p.frame_hdr->gmv[i];
}
int32_t *const mat = hdr->gmv[i].matrix;
const int32_t *const ref_mat = ref_gmv->matrix;
int bits, shift;
if (hdr->gmv[i].type >= WM_TYPE_ROT_ZOOM) {
if (hdr->gmv[i].type >= DAV1D_WM_TYPE_ROT_ZOOM) {
mat[2] = (1 << 16) + 2 *
dav1d_get_bits_subexp(gb, (ref_mat[2] - (1 << 16)) >> 1, 12);
mat[3] = 2 * dav1d_get_bits_subexp(gb, ref_mat[3] >> 1, 12);
@ -880,7 +1010,7 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
shift = 13 + !hdr->hp;
}
if (hdr->gmv[i].type == WM_TYPE_AFFINE) {
if (hdr->gmv[i].type == DAV1D_WM_TYPE_AFFINE) {
mat[4] = 2 * dav1d_get_bits_subexp(gb, ref_mat[4] >> 1, 12);
mat[5] = (1 << 16) + 2 *
dav1d_get_bits_subexp(gb, (ref_mat[5] - (1 << 16)) >> 1, 12);
@ -891,9 +1021,6 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
mat[0] = dav1d_get_bits_subexp(gb, ref_mat[0] >> shift, bits) * (1 << shift);
mat[1] = dav1d_get_bits_subexp(gb, ref_mat[1] >> shift, bits) * (1 << shift);
if (dav1d_get_shear_params(&hdr->gmv[i]))
hdr->gmv[i].type = WM_TYPE_TRANSLATION;
}
}
#if DEBUG_FRAME_HDR
@ -905,7 +1032,7 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
(hdr->show_frame || hdr->showable_frame) &&
dav1d_get_bits(gb, 1);
if (hdr->film_grain.present) {
hdr->film_grain.seed = dav1d_get_bits(gb, 16);
const unsigned seed = dav1d_get_bits(gb, 16);
hdr->film_grain.update = hdr->frame_type != DAV1D_FRAME_TYPE_INTER || dav1d_get_bits(gb, 1);
if (!hdr->film_grain.update) {
const int refidx = dav1d_get_bits(gb, 3);
@ -913,10 +1040,12 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
for (i = 0; i < 7; i++)
if (hdr->refidx[i] == refidx)
break;
if (i == 7) goto error;
hdr->film_grain.data = c->refs[refidx].film_grain;
if (i == 7 || !c->refs[refidx].p.p.frame_hdr) goto error;
hdr->film_grain.data = c->refs[refidx].p.p.frame_hdr->film_grain.data;
hdr->film_grain.data.seed = seed;
} else {
Av1FilmGrainData *const fgd = &hdr->film_grain.data;
Dav1dFilmGrainData *const fgd = &hdr->film_grain.data;
fgd->seed = seed;
fgd->num_y_points = dav1d_get_bits(gb, 4);
if (fgd->num_y_points > 14) goto error;
@ -928,10 +1057,9 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
}
fgd->chroma_scaling_from_luma =
seqhdr->layout != DAV1D_PIXEL_LAYOUT_I400 && dav1d_get_bits(gb, 1);
if (seqhdr->layout == DAV1D_PIXEL_LAYOUT_I400 ||
fgd->chroma_scaling_from_luma ||
(seqhdr->layout == DAV1D_PIXEL_LAYOUT_I420 && !fgd->num_y_points))
!seqhdr->monochrome && dav1d_get_bits(gb, 1);
if (seqhdr->monochrome || fgd->chroma_scaling_from_luma ||
(seqhdr->ss_ver == 1 && seqhdr->ss_hor == 1 && !fgd->num_y_points))
{
fgd->num_uv_points[0] = fgd->num_uv_points[1] = 0;
} else for (int pl = 0; pl < 2; pl++) {
@ -945,7 +1073,7 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
}
}
if (seqhdr->layout == DAV1D_PIXEL_LAYOUT_I420 &&
if (seqhdr->ss_hor == 1 && seqhdr->ss_ver == 1 &&
!!fgd->num_uv_points[0] != !!fgd->num_uv_points[1])
{
goto error;
@ -967,9 +1095,9 @@ static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
fgd->grain_scale_shift = dav1d_get_bits(gb, 2);
for (int pl = 0; pl < 2; pl++)
if (fgd->num_uv_points[pl]) {
fgd->uv_mult[pl] = dav1d_get_bits(gb, 8);
fgd->uv_luma_mult[pl] = dav1d_get_bits(gb, 8);
fgd->uv_offset[pl] = dav1d_get_bits(gb, 9);
fgd->uv_mult[pl] = dav1d_get_bits(gb, 8) - 128;
fgd->uv_luma_mult[pl] = dav1d_get_bits(gb, 8) - 128;
fgd->uv_offset[pl] = dav1d_get_bits(gb, 9) - 256;
}
fgd->overlap_flag = dav1d_get_bits(gb, 1);
fgd->clip_to_restricted_range = dav1d_get_bits(gb, 1);
@ -991,13 +1119,13 @@ error:
static void parse_tile_hdr(Dav1dContext *const c, GetBits *const gb) {
int have_tile_pos = 0;
const int n_tiles = c->frame_hdr.tiling.cols * c->frame_hdr.tiling.rows;
const int n_tiles = c->frame_hdr->tiling.cols * c->frame_hdr->tiling.rows;
if (n_tiles > 1)
have_tile_pos = dav1d_get_bits(gb, 1);
if (have_tile_pos) {
const int n_bits = c->frame_hdr.tiling.log2_cols +
c->frame_hdr.tiling.log2_rows;
const int n_bits = c->frame_hdr->tiling.log2_cols +
c->frame_hdr->tiling.log2_rows;
c->tile[c->n_tile_data].start = dav1d_get_bits(gb, n_bits);
c->tile[c->n_tile_data].end = dav1d_get_bits(gb, n_bits);
} else {
@ -1031,7 +1159,7 @@ check_for_overrun(GetBits *const gb, unsigned init_bit_pos, unsigned obu_len)
return 0;
}
int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in) {
int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in, int global) {
GetBits gb;
int res;
@ -1043,9 +1171,11 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in) {
const int has_extension = dav1d_get_bits(&gb, 1);
const int has_length_field = dav1d_get_bits(&gb, 1);
dav1d_get_bits(&gb, 1); // reserved
int temporal_id = 0, spatial_id = 0;
if (has_extension) {
dav1d_get_bits(&gb, 3); // temporal_layer_id
dav1d_get_bits(&gb, 2); // enhancement_layer_id
temporal_id = dav1d_get_bits(&gb, 3);
spatial_id = dav1d_get_bits(&gb, 2);
dav1d_get_bits(&gb, 3); // reserved
}
@ -1083,19 +1213,35 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in) {
// rest of the OBU.
if (len > in->sz - init_byte_pos) goto error;
// skip obu not belonging to the selected temporal/spatial layer
if (type != OBU_SEQ_HDR && type != OBU_TD &&
has_extension && c->operating_point_idc != 0)
{
const int in_temporal_layer = (c->operating_point_idc >> temporal_id) & 1;
const int in_spatial_layer = (c->operating_point_idc >> (spatial_id + 8)) & 1;
if (!in_temporal_layer || !in_spatial_layer)
return len + init_byte_pos;
}
switch (type) {
case OBU_SEQ_HDR: {
Av1SequenceHeader hdr, *const hdr_ptr = c->have_seq_hdr ? &hdr : &c->seq_hdr;
memset(hdr_ptr, 0, sizeof(*hdr_ptr));
c->have_frame_hdr = 0;
if ((res = parse_seq_hdr(c, &gb, hdr_ptr)) < 0)
Dav1dRef *ref = dav1d_ref_create(sizeof(Dav1dSequenceHeader));
if (!ref) return -ENOMEM;
Dav1dSequenceHeader *seq_hdr = ref->data;
memset(seq_hdr, 0, sizeof(*seq_hdr));
c->frame_hdr = NULL;
if ((res = parse_seq_hdr(c, &gb, seq_hdr)) < 0) {
dav1d_ref_dec(&ref);
return res;
if (check_for_overrun(&gb, init_bit_pos, len))
}
if (check_for_overrun(&gb, init_bit_pos, len)) {
dav1d_ref_dec(&ref);
return -EINVAL;
}
// If we have read a sequence header which is different from
// the old one, this is a new video sequence and can't use any
// previous state. Free that state.
if (c->have_seq_hdr && memcmp(&hdr, &c->seq_hdr, sizeof(hdr))) {
if (c->seq_hdr && memcmp(seq_hdr, c->seq_hdr, sizeof(*seq_hdr))) {
for (int i = 0; i < 8; i++) {
if (c->refs[i].p.p.data[0])
dav1d_thread_picture_unref(&c->refs[i].p);
@ -1104,21 +1250,33 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in) {
if (c->cdf[i].cdf)
dav1d_cdf_thread_unref(&c->cdf[i]);
}
c->seq_hdr = hdr;
}
c->have_seq_hdr = 1;
dav1d_ref_dec(&c->seq_hdr_ref);
c->seq_hdr_ref = ref;
c->seq_hdr = seq_hdr;
break;
}
case OBU_REDUNDANT_FRAME_HDR:
if (c->have_frame_hdr) break;
if (c->frame_hdr) break;
// fall-through
case OBU_FRAME:
case OBU_FRAME_HDR:
c->have_frame_hdr = 0;
if (!c->have_seq_hdr) goto error;
if ((res = parse_frame_hdr(c, &gb)) < 0)
if (global) break;
if (!c->seq_hdr) goto error;
if (!c->frame_hdr_ref) {
c->frame_hdr_ref = dav1d_ref_create(sizeof(Dav1dFrameHeader));
if (!c->frame_hdr_ref) return -ENOMEM;
}
// ensure that the reference is writable
assert(dav1d_ref_is_writable(c->frame_hdr_ref));
c->frame_hdr = c->frame_hdr_ref->data;
memset(c->frame_hdr, 0, sizeof(*c->frame_hdr));
c->frame_hdr->temporal_id = temporal_id;
c->frame_hdr->spatial_id = spatial_id;
if ((res = parse_frame_hdr(c, &gb)) < 0) {
c->frame_hdr = NULL;
return res;
c->have_frame_hdr = 1;
}
for (int n = 0; n < c->n_tile_data; n++)
dav1d_data_unref(&c->tile[n].data);
c->n_tile_data = 0;
@ -1127,13 +1285,18 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in) {
// This is actually a frame header OBU so read the
// trailing bit and check for overrun.
dav1d_get_bits(&gb, 1);
if (check_for_overrun(&gb, init_bit_pos, len))
if (check_for_overrun(&gb, init_bit_pos, len)) {
c->frame_hdr = NULL;
return -EINVAL;
}
break;
}
// OBU_FRAMEs shouldn't be signalled with show_existing_frame
if (c->frame_hdr.show_existing_frame) goto error;
if (c->frame_hdr->show_existing_frame) {
c->frame_hdr = NULL;
goto error;
}
// This is the frame header at the start of a frame OBU.
// There's no trailing bit at the end to skip, but we do need
@ -1141,7 +1304,8 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in) {
dav1d_bytealign_get_bits(&gb);
// fall-through
case OBU_TILE_GRP: {
if (!c->have_frame_hdr) goto error;
if (global) break;
if (!c->frame_hdr) goto error;
if (c->n_tile_data >= 256) goto error;
parse_tile_hdr(c, &gb);
// Align to the next byte boundary and check for overrun.
@ -1156,6 +1320,7 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in) {
assert(pkt_bytelen >= (bit_pos >> 3));
dav1d_ref_inc(in->ref);
c->tile[c->n_tile_data].data.ref = in->ref;
c->tile[c->n_tile_data].data.m = in->m;
c->tile[c->n_tile_data].data.data = in->data + (bit_pos >> 3);
c->tile[c->n_tile_data].data.sz = pkt_bytelen - (bit_pos >> 3);
// ensure tile groups are in order and sane, see 6.10.1
@ -1183,72 +1348,67 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in) {
return -EINVAL;
}
if (c->have_seq_hdr && c->have_frame_hdr &&
c->n_tiles == c->frame_hdr.tiling.cols * c->frame_hdr.tiling.rows)
{
if (!c->n_tile_data)
return -EINVAL;
if ((res = dav1d_submit_frame(c)) < 0)
return res;
assert(!c->n_tile_data);
c->have_frame_hdr = 0;
c->n_tiles = 0;
} else if (c->have_seq_hdr && c->have_frame_hdr &&
c->frame_hdr.show_existing_frame)
{
if (c->n_fc == 1) {
dav1d_picture_ref(&c->out,
&c->refs[c->frame_hdr.existing_frame_idx].p.p);
} else {
// need to append this to the frame output queue
const unsigned next = c->frame_thread.next++;
if (c->frame_thread.next == c->n_fc)
c->frame_thread.next = 0;
if (c->seq_hdr && c->frame_hdr) {
if (c->frame_hdr->show_existing_frame) {
if (!c->refs[c->frame_hdr->existing_frame_idx].p.p.data[0]) return -EINVAL;
if (c->n_fc == 1) {
dav1d_picture_ref(&c->out,
&c->refs[c->frame_hdr->existing_frame_idx].p.p);
c->out.m = in->m;
} else {
// need to append this to the frame output queue
const unsigned next = c->frame_thread.next++;
if (c->frame_thread.next == c->n_fc)
c->frame_thread.next = 0;
Dav1dFrameContext *const f = &c->fc[next];
pthread_mutex_lock(&f->frame_thread.td.lock);
while (f->n_tile_data > 0)
pthread_cond_wait(&f->frame_thread.td.cond,
&f->frame_thread.td.lock);
Dav1dThreadPicture *const out_delayed =
&c->frame_thread.out_delayed[next];
if (out_delayed->p.data[0]) {
if (out_delayed->visible && !out_delayed->flushed)
dav1d_picture_ref(&c->out, &out_delayed->p);
dav1d_thread_picture_unref(out_delayed);
Dav1dFrameContext *const f = &c->fc[next];
pthread_mutex_lock(&f->frame_thread.td.lock);
while (f->n_tile_data > 0)
pthread_cond_wait(&f->frame_thread.td.cond,
&f->frame_thread.td.lock);
Dav1dThreadPicture *const out_delayed =
&c->frame_thread.out_delayed[next];
if (out_delayed->p.data[0]) {
const unsigned progress = atomic_load_explicit(&out_delayed->progress[1],
memory_order_relaxed);
if (out_delayed->visible && progress != FRAME_ERROR)
dav1d_picture_ref(&c->out, &out_delayed->p);
dav1d_thread_picture_unref(out_delayed);
}
dav1d_thread_picture_ref(out_delayed,
&c->refs[c->frame_hdr->existing_frame_idx].p);
out_delayed->visible = 1;
out_delayed->p.m = in->m;
pthread_mutex_unlock(&f->frame_thread.td.lock);
}
dav1d_thread_picture_ref(out_delayed,
&c->refs[c->frame_hdr.existing_frame_idx].p);
out_delayed->visible = 1;
out_delayed->flushed = 0;
pthread_mutex_unlock(&f->frame_thread.td.lock);
}
c->have_frame_hdr = 0;
if (c->refs[c->frame_hdr.existing_frame_idx].p.p.p.type == DAV1D_FRAME_TYPE_KEY) {
const int r = c->frame_hdr.existing_frame_idx;
for (int i = 0; i < 8; i++) {
if (i == c->frame_hdr.existing_frame_idx) continue;
if (c->refs[c->frame_hdr->existing_frame_idx].p.p.frame_hdr->frame_type == DAV1D_FRAME_TYPE_KEY) {
const int r = c->frame_hdr->existing_frame_idx;
for (int i = 0; i < 8; i++) {
if (i == c->frame_hdr->existing_frame_idx) continue;
if (c->refs[i].p.p.data[0])
dav1d_thread_picture_unref(&c->refs[i].p);
dav1d_thread_picture_ref(&c->refs[i].p, &c->refs[r].p);
if (c->refs[i].p.p.data[0])
dav1d_thread_picture_unref(&c->refs[i].p);
dav1d_thread_picture_ref(&c->refs[i].p, &c->refs[r].p);
if (c->cdf[i].cdf) dav1d_cdf_thread_unref(&c->cdf[i]);
dav1d_init_states(&c->cdf[i], c->refs[r].qidx);
if (c->cdf[i].cdf) dav1d_cdf_thread_unref(&c->cdf[i]);
dav1d_init_states(&c->cdf[i], c->refs[r].p.p.frame_hdr->quant.yac);
c->refs[i].lf_mode_ref_deltas = c->refs[r].lf_mode_ref_deltas;
c->refs[i].seg_data = c->refs[r].seg_data;
for (int j = 0; j < 7; j++)
c->refs[i].gmv[j] = dav1d_default_wm_params;
c->refs[i].film_grain = c->refs[r].film_grain;
dav1d_ref_dec(&c->refs[i].segmap);
c->refs[i].segmap = c->refs[r].segmap;
if (c->refs[r].segmap)
dav1d_ref_inc(c->refs[r].segmap);
dav1d_ref_dec(&c->refs[i].refmvs);
c->refs[i].qidx = c->refs[r].qidx;
dav1d_ref_dec(&c->refs[i].segmap);
c->refs[i].segmap = c->refs[r].segmap;
if (c->refs[r].segmap)
dav1d_ref_inc(c->refs[r].segmap);
dav1d_ref_dec(&c->refs[i].refmvs);
}
}
c->frame_hdr = NULL;
} else if (c->n_tiles == c->frame_hdr->tiling.cols * c->frame_hdr->tiling.rows) {
if (!c->n_tile_data)
return -EINVAL;
if ((res = dav1d_submit_frame(c)) < 0)
return res;
assert(!c->n_tile_data);
c->frame_hdr = NULL;
c->n_tiles = 0;
}
}

View File

@ -31,6 +31,6 @@
#include "dav1d/data.h"
#include "src/internal.h"
int dav1d_parse_obus(Dav1dContext *c, Dav1dData *in);
int dav1d_parse_obus(Dav1dContext *c, Dav1dData *in, int global);
#endif /* __DAV1D_SRC_OBU_H__ */

View File

@ -29,6 +29,7 @@
#include <assert.h>
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -74,29 +75,24 @@ int default_picture_allocator(Dav1dPicture *const p, void *cookie) {
return 0;
}
void default_picture_release(uint8_t *const data, void *const allocator_data,
void *cookie)
{
void default_picture_release(Dav1dPicture *const p, void *cookie) {
assert(cookie == NULL);
#ifndef NDEBUG /* safety check */
assert(allocator_data == data);
assert(p->allocator_data == p->data[0]);
#endif
dav1d_free_aligned(data);
dav1d_free_aligned(p->data[0]);
}
struct pic_ctx_context {
Dav1dPicAllocator allocator;
void *allocator_data;
uint8_t *data;
Dav1dPicture pic;
void *extra_ptr; /* MUST BE AT THE END */
};
static void free_buffer(const uint8_t *data, void *user_data)
{
static void free_buffer(const uint8_t *const data, void *const user_data) {
struct pic_ctx_context *pic_ctx = user_data;
pic_ctx->allocator.release_picture_callback(pic_ctx->data,
pic_ctx->allocator_data,
pic_ctx->allocator.release_picture_callback(&pic_ctx->pic,
pic_ctx->allocator.cookie);
free(pic_ctx);
}
@ -121,10 +117,9 @@ static int picture_alloc_with_edges(Dav1dPicture *const p,
p->p.w = w;
p->p.h = h;
p->p.pri = DAV1D_COLOR_PRI_UNKNOWN;
p->p.trc = DAV1D_TRC_UNKNOWN;
p->p.mtrx = DAV1D_MC_UNKNOWN;
p->p.chr = DAV1D_CHR_UNKNOWN;
p->m.timestamp = INT64_MIN;
p->m.duration = 0;
p->m.offset = -1;
p->p.layout = layout;
p->p.bpc = bpc;
int res = p_allocator->alloc_picture_callback(p, p_allocator->cookie);
@ -134,12 +129,10 @@ static int picture_alloc_with_edges(Dav1dPicture *const p,
}
pic_ctx->allocator = *p_allocator;
pic_ctx->allocator_data = p->allocator_data;
pic_ctx->data = p->data[0];
pic_ctx->pic = *p;
if (!(p->ref = dav1d_ref_wrap(p->data[0], free_buffer, pic_ctx))) {
p_allocator->release_picture_callback(p->data[0], p->allocator_data,
p_allocator->cookie);
p_allocator->release_picture_callback(p, p_allocator->cookie);
fprintf(stderr, "Failed to wrap picture: %s\n", strerror(errno));
return -ENOMEM;
}
@ -165,7 +158,6 @@ int dav1d_thread_picture_alloc(Dav1dThreadPicture *const p,
if (res) return res;
p->visible = visible;
p->flushed = 0;
if (t) {
atomic_init(&p->progress[0], 0);
atomic_init(&p->progress[1], 0);
@ -173,6 +165,29 @@ int dav1d_thread_picture_alloc(Dav1dThreadPicture *const p,
return res;
}
int dav1d_picture_alloc_copy(Dav1dPicture *const dst, const int w,
const Dav1dPicture *const src)
{
struct pic_ctx_context *const pic_ctx = src->ref->user_data;
const int res = picture_alloc_with_edges(dst, w, src->p.h, src->p.layout,
src->p.bpc, &pic_ctx->allocator,
0, NULL);
if (!res) {
dst->p = src->p;
dst->m = src->m;
dst->p.w = w;
dst->frame_hdr = src->frame_hdr;
dst->frame_hdr_ref = src->frame_hdr_ref;
if (dst->frame_hdr_ref) dav1d_ref_inc(dst->frame_hdr_ref);
dst->seq_hdr = src->seq_hdr;
dst->seq_hdr_ref = src->seq_hdr_ref;
if (dst->seq_hdr_ref) dav1d_ref_inc(dst->seq_hdr_ref);
}
return res;
}
void dav1d_picture_ref(Dav1dPicture *const dst, const Dav1dPicture *const src) {
validate_input(dst != NULL);
validate_input(dst->data[0] == NULL);
@ -181,6 +196,8 @@ void dav1d_picture_ref(Dav1dPicture *const dst, const Dav1dPicture *const src) {
if (src->ref) {
validate_input(src->data[0] != NULL);
dav1d_ref_inc(src->ref);
if (src->frame_hdr_ref) dav1d_ref_inc(src->frame_hdr_ref);
if (src->seq_hdr_ref) dav1d_ref_inc(src->seq_hdr_ref);
}
*dst = *src;
}
@ -204,7 +221,6 @@ void dav1d_thread_picture_ref(Dav1dThreadPicture *dst,
dst->t = src->t;
dst->visible = src->visible;
dst->progress = src->progress;
dst->flushed = src->flushed;
}
void dav1d_picture_unref(Dav1dPicture *const p) {
@ -213,6 +229,8 @@ void dav1d_picture_unref(Dav1dPicture *const p) {
if (p->ref) {
validate_input(p->data[0] != NULL);
dav1d_ref_dec(&p->ref);
dav1d_ref_dec(&p->seq_hdr_ref);
dav1d_ref_dec(&p->frame_hdr_ref);
}
memset(p, 0, sizeof(*p));
}
@ -260,8 +278,10 @@ void dav1d_thread_picture_signal(const Dav1dThreadPicture *const p,
return;
pthread_mutex_lock(&p->t->lock);
if (plane_type != PLANE_TYPE_Y) atomic_store(&p->progress[0], y);
if (plane_type != PLANE_TYPE_BLOCK) atomic_store(&p->progress[1], y);
if (plane_type != PLANE_TYPE_Y)
atomic_store(&p->progress[0], y);
if (plane_type != PLANE_TYPE_BLOCK)
atomic_store(&p->progress[1], y);
pthread_cond_broadcast(&p->t->cond);
pthread_mutex_unlock(&p->t->lock);
}

View File

@ -44,7 +44,7 @@ enum PlaneType {
typedef struct Dav1dThreadPicture {
Dav1dPicture p;
int visible, flushed;
int visible;
struct thread_data *t;
// [0] block data (including segmentation map and motion vectors)
// [1] pixel data
@ -59,6 +59,16 @@ int dav1d_thread_picture_alloc(Dav1dThreadPicture *p, int w, int h,
struct thread_data *t, int visible,
Dav1dPicAllocator *);
/**
* Allocate a picture with identical metadata to an existing picture.
* The width is a separate argument so this function can be used for
* super-res, where the width changes, but everything else is the same.
* For the more typical use case of allocating a new image of the same
* dimensions, use src->p.w as width.
*/
int dav1d_picture_alloc_copy(Dav1dPicture *dst, const int w,
const Dav1dPicture *src);
/**
* Create a copy of a picture.
*/
@ -98,6 +108,6 @@ void dav1d_thread_picture_signal(const Dav1dThreadPicture *p, int y,
enum PlaneType plane_type);
int default_picture_allocator(Dav1dPicture *, void *cookie);
void default_picture_release(uint8_t *, void *allocator_data, void *cookie);
void default_picture_release(Dav1dPicture *, void *cookie);
#endif /* __DAV1D_SRC_PICTURE_H__ */

View File

@ -32,7 +32,7 @@
#include "src/levels.h"
#define DEBUG_BLOCK_INFO 0 && \
f->frame_hdr.frame_offset == 2 && t->by >= 0 && t->by < 4 && \
f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
t->bx >= 8 && t->bx < 12
#define DEBUG_B_PIXELS 0

View File

@ -72,7 +72,7 @@ static int decode_coefs(Dav1dTileContext *const t,
if (dbg) printf("Start: r=%d\n", ts->msac.rng);
// does this block have any non-zero coefficients
const int sctx = get_coef_skip_ctx(t_dim, bs, a, l, chroma, f->cur.p.p.layout);
const int sctx = get_coef_skip_ctx(t_dim, bs, a, l, chroma, f->cur.p.layout);
const int all_skip =
msac_decode_bool_adapt(&ts->msac, ts->cdf.coef.skip[t_dim->ctx][sctx]);
if (dbg)
@ -80,7 +80,7 @@ static int decode_coefs(Dav1dTileContext *const t,
t_dim->ctx, sctx, all_skip, ts->msac.rng);
if (all_skip) {
*res_ctx = 0x40;
*txtp = f->frame_hdr.segmentation.lossless[b->seg_id] ? WHT_WHT :
*txtp = f->frame_hdr->segmentation.lossless[b->seg_id] ? WHT_WHT :
DCT_DCT;
return -1;
}
@ -88,14 +88,14 @@ static int decode_coefs(Dav1dTileContext *const t,
// transform type (chroma: derived, luma: explicitly coded)
if (chroma) {
if (intra) {
*txtp = get_uv_intra_txtp(b->uv_mode, tx, &f->frame_hdr, b->seg_id);
*txtp = get_uv_intra_txtp(b->uv_mode, tx, f->frame_hdr, b->seg_id);
} else {
const enum TxfmType y_txtp = *txtp;
*txtp = get_uv_inter_txtp(t_dim, y_txtp, &f->frame_hdr, b->seg_id);
*txtp = get_uv_inter_txtp(t_dim, y_txtp, f->frame_hdr, b->seg_id);
}
} else {
const enum TxfmTypeSet set = get_ext_txtp_set(tx, !intra,
&f->frame_hdr, b->seg_id);
f->frame_hdr, b->seg_id);
const unsigned set_cnt = dav1d_tx_type_count[set];
unsigned idx;
if (set_cnt == 1) {
@ -289,7 +289,7 @@ static void read_coef_tree(Dav1dTileContext *const t,
t->by += txsh;
if (txh >= txw && t->by < f->bh) {
if (dst)
dst += 4 * txsh * PXSTRIDE(f->cur.p.stride[0]);
dst += 4 * txsh * PXSTRIDE(f->cur.stride[0]);
read_coef_tree(t, bs, b, sub, depth + 1, tx_split,
x_off * 2 + 0, y_off * 2 + 1, dst);
t->bx += txsw;
@ -349,9 +349,9 @@ static void read_coef_tree(Dav1dTileContext *const t,
if (eob >= 0) {
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
coef_dump(cf, imin(t_dim->h, 8) * 4, imin(t_dim->w, 8) * 4, 3, "dq");
dsp->itx.itxfm_add[ytx][txtp](dst, f->cur.p.stride[0], cf, eob);
dsp->itx.itxfm_add[ytx][txtp](dst, f->cur.stride[0], cf, eob);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
hex_dump(dst, f->cur.p.stride[0], t_dim->w * 4, t_dim->h * 4, "recon");
hex_dump(dst, f->cur.stride[0], t_dim->w * 4, t_dim->h * 4, "recon");
}
}
}
@ -361,14 +361,14 @@ void bytefn(dav1d_read_coef_blocks)(Dav1dTileContext *const t,
const enum BlockSize bs, const Av1Block *const b)
{
const Dav1dFrameContext *const f = t->f;
const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int bx4 = t->bx & 31, by4 = t->by & 31;
const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
const uint8_t *const b_dim = dav1d_block_dimensions[bs];
const int bw4 = b_dim[0], bh4 = b_dim[1];
const int cbw4 = (bw4 + 1) >> ss_hor, cbh4 = (bh4 + 1) >> ss_ver;
const int has_chroma = f->seq_hdr.layout != DAV1D_PIXEL_LAYOUT_I400 &&
const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
(bw4 > ss_hor || t->bx & 1) &&
(bh4 > ss_ver || t->by & 1);
@ -501,27 +501,27 @@ static int mc(Dav1dTileContext *const t,
{
assert((dst8 != NULL) ^ (dst16 != NULL));
const Dav1dFrameContext *const f = t->f;
const int ss_ver = !!pl && f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = !!pl && f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
const int mvx = mv.x, mvy = mv.y;
const int mx = mvx & (15 >> !ss_hor), my = mvy & (15 >> !ss_ver);
ptrdiff_t ref_stride = refp->p.stride[!!pl];
const pixel *ref;
if (refp->p.p.w == f->cur.p.p.w && refp->p.p.h == f->cur.p.p.h) {
if (refp->p.p.w == f->cur.p.w && refp->p.p.h == f->cur.p.h) {
const int dx = bx * h_mul + (mvx >> (3 + ss_hor));
const int dy = by * v_mul + (mvy >> (3 + ss_ver));
int w, h;
if (refp != &f->cur) { // i.e. not for intrabc
if (refp->p.data[0] != f->cur.data[0]) { // i.e. not for intrabc
if (dav1d_thread_picture_wait(refp, dy + bh4 * v_mul + !!my * 4,
PLANE_TYPE_Y + !!pl))
{
return -1;
}
w = (f->cur.p.p.w + ss_hor) >> ss_hor;
h = (f->cur.p.p.h + ss_ver) >> ss_ver;
w = (f->cur.p.w + ss_hor) >> ss_hor;
h = (f->cur.p.h + ss_ver) >> ss_ver;
} else {
w = f->bw * 4 >> ss_hor;
h = f->bh * 4 >> ss_ver;
@ -548,7 +548,7 @@ static int mc(Dav1dTileContext *const t,
bh4 * v_mul, mx << !ss_hor, my << !ss_ver);
}
} else {
assert(refp != &f->cur);
assert(refp != &f->sr_cur);
int orig_pos_y = (by * v_mul << 4) + mvy * (1 << !ss_ver);
int orig_pos_x = (bx * h_mul << 4) + mvx * (1 << !ss_hor);
@ -567,8 +567,13 @@ static int mc(Dav1dTileContext *const t,
const int bottom =
((pos_y + (bh4 * v_mul - 1) * f->svc[refidx][1].step) >> 10) + 1;
if (dav1d_thread_picture_wait(refp, bottom, PLANE_TYPE_Y + !!pl))
if (dav1d_thread_picture_wait(refp, bottom + 4, PLANE_TYPE_Y + !!pl))
return -1;
if (DEBUG_BLOCK_INFO)
printf("Off %dx%d [%d,%d,%d], size %dx%d [%d,%d]\n",
left, top, orig_pos_x, f->svc[refidx][0].scale, refidx,
right-left, bottom-top,
f->svc[refidx][0].step, f->svc[refidx][1].step);
const int w = (refp->p.p.w + ss_hor) >> ss_hor;
const int h = (refp->p.p.h + ss_ver) >> ss_ver;
@ -579,6 +584,7 @@ static int mc(Dav1dTileContext *const t,
refp->p.data[pl], ref_stride);
ref = &t->emu_edge[320 * 3 + 3];
ref_stride = 320 * sizeof(pixel);
if (DEBUG_BLOCK_INFO) printf("Emu\n");
} else {
ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * top + left;
}
@ -610,8 +616,8 @@ static int obmc(Dav1dTileContext *const t,
const Dav1dFrameContext *const f = t->f;
const refmvs *const r = &f->mvs[t->by * f->b4_stride + t->bx];
pixel *const lap = t->scratch.lap;
const int ss_ver = !!pl && f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = !!pl && f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
int res;
@ -668,13 +674,13 @@ static int warp_affine(Dav1dTileContext *const t,
pixel *dst8, coef *dst16, const ptrdiff_t dstride,
const uint8_t *const b_dim, const int pl,
const Dav1dThreadPicture *const refp,
const WarpedMotionParams *const wmp)
const Dav1dWarpedMotionParams *const wmp)
{
assert((dst8 != NULL) ^ (dst16 != NULL));
const Dav1dFrameContext *const f = t->f;
const Dav1dDSPContext *const dsp = f->dsp;
const int ss_ver = !!pl && f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = !!pl && f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7));
const int32_t *const mat = wmp->matrix;
@ -735,14 +741,14 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize
const Dav1dFrameContext *const f = t->f;
const Dav1dDSPContext *const dsp = f->dsp;
const int bx4 = t->bx & 31, by4 = t->by & 31;
const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
const uint8_t *const b_dim = dav1d_block_dimensions[bs];
const int bw4 = b_dim[0], bh4 = b_dim[1];
const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
const int has_chroma = f->seq_hdr.layout != DAV1D_PIXEL_LAYOUT_I400 &&
const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
(bw4 > ss_hor || t->bx & 1) &&
(bh4 > ss_ver || t->by & 1);
const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[b->tx];
@ -753,13 +759,13 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize
pixel *const edge = edge_buf + 128;
const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
const int intra_edge_filter_flag = f->seq_hdr.intra_edge_filter << 10;
const int intra_edge_filter_flag = f->seq_hdr->intra_edge_filter << 10;
for (int init_y = 0; init_y < h4; init_y += 16) {
for (int init_x = 0; init_x < w4; init_x += 16) {
if (b->pal_sz[0]) {
pixel *dst = ((pixel *) f->cur.p.data[0]) +
4 * (t->by * PXSTRIDE(f->cur.p.stride[0]) + t->bx);
pixel *dst = ((pixel *) f->cur.data[0]) +
4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
const uint8_t *pal_idx;
if (f->frame_thread.pass) {
pal_idx = ts->frame_thread.pal_idx;
@ -770,10 +776,10 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize
const uint16_t *const pal = f->frame_thread.pass ?
f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
((t->bx >> 1) + (t->by & 1))][0] : t->pal[0];
f->dsp->ipred.pal_pred(dst, f->cur.p.stride[0], pal,
f->dsp->ipred.pal_pred(dst, f->cur.stride[0], pal,
pal_idx, bw4 * 4, bh4 * 4);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
hex_dump(dst, PXSTRIDE(f->cur.p.stride[0]),
hex_dump(dst, PXSTRIDE(f->cur.stride[0]),
bw4 * 4, bh4 * 4, "y-pal-pred");
}
@ -790,8 +796,8 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize
for (y = init_y, t->by += init_y; y < sub_h4;
y += t_dim->h, t->by += t_dim->h)
{
pixel *dst = ((pixel *) f->cur.p.data[0]) +
4 * (t->by * PXSTRIDE(f->cur.p.stride[0]) +
pixel *dst = ((pixel *) f->cur.data[0]) +
4 * (t->by * PXSTRIDE(f->cur.stride[0]) +
t->bx + init_x);
for (x = init_x, t->bx += init_x; x < sub_w4;
x += t_dim->w, t->bx += t_dim->w)
@ -818,10 +824,10 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize
ts->tiling.col_end,
ts->tiling.row_end,
edge_flags, dst,
f->cur.p.stride[0], top_sb_edge,
f->cur.stride[0], top_sb_edge,
b->y_mode, &angle,
t_dim->w, t_dim->h, edge);
dsp->ipred.intra_pred[m](dst, f->cur.p.stride[0], edge,
dsp->ipred.intra_pred[m](dst, f->cur.stride[0], edge,
t_dim->w * 4, t_dim->h * 4,
angle | intra_flags,
4 * f->bw - 4 * t->bx,
@ -833,7 +839,7 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize
hex_dump(edge, 0, 1, 1, "tl");
hex_dump(edge + 1, t_dim->w * 4,
t_dim->w * 4, 2, "t");
hex_dump(dst, f->cur.p.stride[0],
hex_dump(dst, f->cur.stride[0],
t_dim->w * 4, t_dim->h * 4, "y-intra-pred");
}
@ -875,10 +881,10 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize
imin(t_dim->w, 8) * 4, 3, "dq");
dsp->itx.itxfm_add[b->tx]
[txtp](dst,
f->cur.p.stride[0],
f->cur.stride[0],
cf, eob);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
hex_dump(dst, f->cur.p.stride[0],
hex_dump(dst, f->cur.stride[0],
t_dim->w * 4, t_dim->h * 4, "recon");
}
} else if (!f->frame_thread.pass) {
@ -896,24 +902,24 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize
if (!has_chroma) continue;
const ptrdiff_t stride = f->cur.p.stride[1];
const ptrdiff_t stride = f->cur.stride[1];
if (b->uv_mode == CFL_PRED) {
assert(!init_x && !init_y);
int16_t *const ac = t->scratch.ac;
pixel *y_src = ((pixel *) f->cur.p.data[0]) + 4 * (t->bx & ~ss_hor) +
4 * (t->by & ~ss_ver) * PXSTRIDE(f->cur.p.stride[0]);
pixel *y_src = ((pixel *) f->cur.data[0]) + 4 * (t->bx & ~ss_hor) +
4 * (t->by & ~ss_ver) * PXSTRIDE(f->cur.stride[0]);
const ptrdiff_t uv_off = 4 * ((t->bx >> ss_hor) +
(t->by >> ss_ver) * PXSTRIDE(stride));
pixel *const uv_dst[2] = { ((pixel *) f->cur.p.data[1]) + uv_off,
((pixel *) f->cur.p.data[2]) + uv_off };
pixel *const uv_dst[2] = { ((pixel *) f->cur.data[1]) + uv_off,
((pixel *) f->cur.data[2]) + uv_off };
const int furthest_r =
((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
const int furthest_b =
((ch4 << ss_ver) + t_dim->h - 1) & ~(t_dim->h - 1);
dsp->ipred.cfl_ac[f->cur.p.p.layout - 1](ac, y_src, f->cur.p.stride[0],
dsp->ipred.cfl_ac[f->cur.p.layout - 1](ac, y_src, f->cur.stride[0],
cbw4 - (furthest_r >> ss_hor),
cbh4 - (furthest_b >> ss_ver),
cbw4 * 4, cbh4 * 4);
@ -950,7 +956,7 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize
}
} else if (b->pal_sz[1]) {
ptrdiff_t uv_dstoff = 4 * ((t->bx >> ss_hor) +
(t->by >> ss_ver) * PXSTRIDE(f->cur.p.stride[1]));
(t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
const uint8_t *pal_idx;
if (f->frame_thread.pass) {
pal_idx = ts->frame_thread.pal_idx;
@ -961,21 +967,21 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize
const uint16_t *const pal_u = f->frame_thread.pass ?
f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
((t->bx >> 1) + (t->by & 1))][1] : t->pal[1];
f->dsp->ipred.pal_pred(((pixel *) f->cur.p.data[1]) + uv_dstoff,
f->cur.p.stride[1], pal_u,
f->dsp->ipred.pal_pred(((pixel *) f->cur.data[1]) + uv_dstoff,
f->cur.stride[1], pal_u,
pal_idx, cbw4 * 4, cbh4 * 4);
const uint16_t *const pal_v = f->frame_thread.pass ?
f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
((t->bx >> 1) + (t->by & 1))][2] : t->pal[2];
f->dsp->ipred.pal_pred(((pixel *) f->cur.p.data[2]) + uv_dstoff,
f->cur.p.stride[1], pal_v,
f->dsp->ipred.pal_pred(((pixel *) f->cur.data[2]) + uv_dstoff,
f->cur.stride[1], pal_v,
pal_idx, cbw4 * 4, cbh4 * 4);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
hex_dump(((pixel *) f->cur.p.data[1]) + uv_dstoff,
PXSTRIDE(f->cur.p.stride[1]),
hex_dump(((pixel *) f->cur.data[1]) + uv_dstoff,
PXSTRIDE(f->cur.stride[1]),
cbw4 * 4, cbh4 * 4, "u-pal-pred");
hex_dump(((pixel *) f->cur.p.data[2]) + uv_dstoff,
PXSTRIDE(f->cur.p.stride[1]),
hex_dump(((pixel *) f->cur.data[2]) + uv_dstoff,
PXSTRIDE(f->cur.stride[1]),
cbw4 * 4, cbh4 * 4, "v-pal-pred");
}
}
@ -984,17 +990,17 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize
sm_uv_flag(&t->l, cby4);
const int uv_sb_has_tr =
((init_x + 16) >> ss_hor) < cw4 ? 1 : init_y ? 0 :
intra_edge_flags & (EDGE_I420_TOP_HAS_RIGHT >> (f->cur.p.p.layout - 1));
intra_edge_flags & (EDGE_I420_TOP_HAS_RIGHT >> (f->cur.p.layout - 1));
const int uv_sb_has_bl =
init_x ? 0 : ((init_y + 16) >> ss_ver) < ch4 ? 1 :
intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.p.layout - 1));
intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.layout - 1));
const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
for (int pl = 0; pl < 2; pl++) {
for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
y += uv_t_dim->h, t->by += uv_t_dim->h << ss_ver)
{
pixel *dst = ((pixel *) f->cur.p.data[1 + pl]) +
pixel *dst = ((pixel *) f->cur.data[1 + pl]) +
4 * ((t->by >> ss_ver) * PXSTRIDE(stride) +
((t->bx + init_x) >> ss_hor));
for (x = init_x >> ss_hor, t->bx += init_x; x < sub_cw4;
@ -1127,57 +1133,56 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
const Dav1dFrameContext *const f = t->f;
const Dav1dDSPContext *const dsp = f->dsp;
const int bx4 = t->bx & 31, by4 = t->by & 31;
const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
const uint8_t *const b_dim = dav1d_block_dimensions[bs];
const int bw4 = b_dim[0], bh4 = b_dim[1];
const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
const int has_chroma = f->seq_hdr.layout != DAV1D_PIXEL_LAYOUT_I400 &&
const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
(bw4 > ss_hor || t->bx & 1) &&
(bh4 > ss_ver || t->by & 1);
const int chr_layout_idx = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I400 ? 0 :
DAV1D_PIXEL_LAYOUT_I444 - f->cur.p.p.layout;
const int chr_layout_idx = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I400 ? 0 :
DAV1D_PIXEL_LAYOUT_I444 - f->cur.p.layout;
int res;
// prediction
const int cbh4 = (bh4 + ss_ver) >> ss_ver, cbw4 = (bw4 + ss_hor) >> ss_hor;
pixel *dst = ((pixel *) f->cur.p.data[0]) +
4 * (t->by * PXSTRIDE(f->cur.p.stride[0]) + t->bx);
pixel *dst = ((pixel *) f->cur.data[0]) +
4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
const ptrdiff_t uvdstoff =
4 * ((t->bx >> ss_hor) + (t->by >> ss_ver) * PXSTRIDE(f->cur.p.stride[1]));
if (!(f->frame_hdr.frame_type & 1)) {
4 * ((t->bx >> ss_hor) + (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
if (!(f->frame_hdr->frame_type & 1)) {
// intrabc
res = mc(t, dst, NULL, f->cur.p.stride[0],
bw4, bh4, t->bx, t->by, 0, b->mv[0], &f->cur, -1, FILTER_2D_BILINEAR);
assert(!f->frame_hdr->super_res.enabled);
res = mc(t, dst, NULL, f->cur.stride[0], bw4, bh4, t->bx, t->by, 0,
b->mv[0], &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
if (res) return res;
if (has_chroma) for (int pl = 1; pl < 3; pl++) {
res = mc(t, ((pixel *)f->cur.p.data[pl]) + uvdstoff, NULL, f->cur.p.stride[1],
res = mc(t, ((pixel *)f->cur.data[pl]) + uvdstoff, NULL, f->cur.stride[1],
bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
t->bx & ~ss_hor, t->by & ~ss_ver,
pl, b->mv[0], &f->cur, -1, FILTER_2D_BILINEAR);
t->bx & ~ss_hor, t->by & ~ss_ver, pl, b->mv[0],
&f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
if (res) return res;
}
} else if (b->comp_type == COMP_INTER_NONE) {
const Dav1dThreadPicture *const refp = &f->refp[b->ref[0]];
const enum Filter2d filter_2d = b->filter2d;
if (imin(bw4, bh4) > 1 && !f->frame_hdr.force_integer_mv &&
((b->inter_mode == GLOBALMV &&
f->frame_hdr.gmv[b->ref[0]].type > WM_TYPE_TRANSLATION) ||
(b->motion_mode == MM_WARP &&
t->warpmv.type > WM_TYPE_TRANSLATION)))
if (imin(bw4, bh4) > 1 &&
((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
(b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
{
res = warp_affine(t, dst, NULL, f->cur.p.stride[0], b_dim, 0, refp,
res = warp_affine(t, dst, NULL, f->cur.stride[0], b_dim, 0, refp,
b->motion_mode == MM_WARP ? &t->warpmv :
&f->frame_hdr.gmv[b->ref[0]]);
&f->frame_hdr->gmv[b->ref[0]]);
if (res) return res;
} else {
res = mc(t, dst, NULL, f->cur.p.stride[0],
res = mc(t, dst, NULL, f->cur.stride[0],
bw4, bh4, t->bx, t->by, 0, b->mv[0], refp, b->ref[0], filter_2d);
if (res) return res;
if (b->motion_mode == MM_OBMC) {
res = obmc(t, dst, f->cur.p.stride[0], b_dim, 0, bx4, by4, w4, h4);
res = obmc(t, dst, f->cur.stride[0], b_dim, 0, bx4, by4, w4, h4);
if (res) return res;
}
}
@ -1197,7 +1202,7 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
m = bytefn(dav1d_prepare_intra_edges)(t->bx, t->bx > ts->tiling.col_start,
t->by, t->by > ts->tiling.row_start,
ts->tiling.col_end, ts->tiling.row_end,
0, dst, f->cur.p.stride[0], top_sb_edge,
0, dst, f->cur.stride[0], top_sb_edge,
m, &angle, bw4, bh4, tl_edge);
dsp->ipred.intra_pred[m](tmp, 4 * bw4 * sizeof(pixel),
tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0);
@ -1205,7 +1210,7 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
b->interintra_type == INTER_INTRA_BLEND ?
dav1d_ii_masks[bs][0][b->interintra_mode] :
dav1d_wedge_masks[bs][0][0][b->wedge_idx];
dsp->mc.blend(dst, f->cur.p.stride[0], tmp,
dsp->mc.blend(dst, f->cur.stride[0], tmp,
bw4 * 4, bh4 * 4, ii_mask);
}
@ -1229,8 +1234,8 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
int h_off = 0, v_off = 0;
if (bw4 == 1 && bh4 == ss_ver) {
for (int pl = 0; pl < 2; pl++) {
res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff,
NULL, f->cur.p.stride[1],
res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
NULL, f->cur.stride[1],
bw4, bh4, t->bx - 1, t->by - 1, 1 + pl,
r[-(f->b4_stride + 1)].mv[0],
&f->refp[r[-(f->b4_stride + 1)].ref[0] - 1],
@ -1239,15 +1244,15 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx - 1].filter2d);
if (res) return res;
}
v_off = 2 * PXSTRIDE(f->cur.p.stride[1]);
v_off = 2 * PXSTRIDE(f->cur.stride[1]);
h_off = 2;
}
if (bw4 == 1) {
const enum Filter2d left_filter_2d =
dav1d_filter_2d[t->l.filter[1][by4]][t->l.filter[0][by4]];
for (int pl = 0; pl < 2; pl++) {
res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + v_off, NULL,
f->cur.p.stride[1], bw4, bh4, t->bx - 1,
res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + v_off, NULL,
f->cur.stride[1], bw4, bh4, t->bx - 1,
t->by, 1 + pl, r[-1].mv[0], &f->refp[r[-1].ref[0] - 1],
r[-1].ref[0] - 1,
f->frame_thread.pass != 2 ? left_filter_2d :
@ -1260,8 +1265,8 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
const enum Filter2d top_filter_2d =
dav1d_filter_2d[t->a->filter[1][bx4]][t->a->filter[0][bx4]];
for (int pl = 0; pl < 2; pl++) {
res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + h_off, NULL,
f->cur.p.stride[1], bw4, bh4, t->bx, t->by - 1,
res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off, NULL,
f->cur.stride[1], bw4, bh4, t->bx, t->by - 1,
1 + pl, r[-f->b4_stride].mv[0],
&f->refp[r[-f->b4_stride].ref[0] - 1],
r[-f->b4_stride].ref[0] - 1,
@ -1269,39 +1274,37 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx].filter2d);
if (res) return res;
}
v_off = 2 * PXSTRIDE(f->cur.p.stride[1]);
v_off = 2 * PXSTRIDE(f->cur.stride[1]);
}
for (int pl = 0; pl < 2; pl++) {
res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.p.stride[1],
res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.stride[1],
bw4, bh4, t->bx, t->by, 1 + pl, b->mv[0],
refp, b->ref[0], filter_2d);
if (res) return res;
}
} else {
if (imin(cbw4, cbh4) > 1 && !f->frame_hdr.force_integer_mv &&
((b->inter_mode == GLOBALMV &&
f->frame_hdr.gmv[b->ref[0]].type > WM_TYPE_TRANSLATION) ||
(b->motion_mode == MM_WARP &&
t->warpmv.type > WM_TYPE_TRANSLATION)))
if (imin(cbw4, cbh4) > 1 &&
((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
(b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
{
for (int pl = 0; pl < 2; pl++) {
res = warp_affine(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff, NULL,
f->cur.p.stride[1], b_dim, 1 + pl, refp,
res = warp_affine(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff, NULL,
f->cur.stride[1], b_dim, 1 + pl, refp,
b->motion_mode == MM_WARP ? &t->warpmv :
&f->frame_hdr.gmv[b->ref[0]]);
&f->frame_hdr->gmv[b->ref[0]]);
if (res) return res;
}
} else {
for (int pl = 0; pl < 2; pl++) {
res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff,
NULL, f->cur.p.stride[1],
res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
NULL, f->cur.stride[1],
bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
t->bx & ~ss_hor, t->by & ~ss_ver,
1 + pl, b->mv[0], refp, b->ref[0], filter_2d);
if (res) return res;
if (b->motion_mode == MM_OBMC) {
res = obmc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff,
f->cur.p.stride[1], b_dim, 1 + pl, bx4, by4, w4, h4);
res = obmc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
f->cur.stride[1], b_dim, 1 + pl, bx4, by4, w4, h4);
if (res) return res;
}
}
@ -1317,12 +1320,13 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
for (int pl = 0; pl < 2; pl++) {
pixel *const tmp = t->scratch.interintra;
pixel tl_edge_px[65], *const tl_edge = &tl_edge_px[32];
ALIGN_STK_32(pixel, tl_edge_px, 65,);
pixel *const tl_edge = &tl_edge_px[32];
enum IntraPredMode m =
b->interintra_mode == II_SMOOTH_PRED ?
SMOOTH_PRED : b->interintra_mode;
int angle = 0;
pixel *const uvdst = ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff;
pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
const pixel *top_sb_edge = NULL;
if (!(t->by & (f->sb_step - 1))) {
top_sb_edge = f->ipred_edge[pl + 1];
@ -1337,12 +1341,12 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
(ts->tiling.row_start >> ss_ver),
ts->tiling.col_end >> ss_hor,
ts->tiling.row_end >> ss_ver,
0, uvdst, f->cur.p.stride[1],
0, uvdst, f->cur.stride[1],
top_sb_edge, m,
&angle, cbw4, cbh4, tl_edge);
dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel),
tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0);
dsp->mc.blend(uvdst, f->cur.p.stride[1], tmp,
dsp->mc.blend(uvdst, f->cur.stride[1], tmp,
cbw4 * 4, cbh4 * 4, ii_mask);
}
}
@ -1361,37 +1365,37 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
for (int i = 0; i < 2; i++) {
const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
if (b->inter_mode == GLOBALMV_GLOBALMV && !f->frame_hdr.force_integer_mv &&
f->frame_hdr.gmv[b->ref[i]].type > WM_TYPE_TRANSLATION)
{
if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) {
res = warp_affine(t, NULL, tmp[i], bw4 * 4, b_dim, 0, refp,
&f->frame_hdr.gmv[b->ref[i]]);
&f->frame_hdr->gmv[b->ref[i]]);
if (res) return res;
} else {
res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,
b->mv[i], refp, b->ref[i], filter_2d);
if (DEBUG_BLOCK_INFO)
coef_dump(tmp[i], bw4*4, bh4*4, 3, "med");
if (res) return res;
}
}
switch (b->comp_type) {
case COMP_INTER_AVG:
dsp->mc.avg(dst, f->cur.p.stride[0], tmp[0], tmp[1],
dsp->mc.avg(dst, f->cur.stride[0], tmp[0], tmp[1],
bw4 * 4, bh4 * 4);
break;
case COMP_INTER_WEIGHTED_AVG:
jnt_weight = f->jnt_weights[b->ref[0]][b->ref[1]];
dsp->mc.w_avg(dst, f->cur.p.stride[0], tmp[0], tmp[1],
dsp->mc.w_avg(dst, f->cur.stride[0], tmp[0], tmp[1],
bw4 * 4, bh4 * 4, jnt_weight);
break;
case COMP_INTER_SEG:
dsp->mc.w_mask[chr_layout_idx](dst, f->cur.p.stride[0],
dsp->mc.w_mask[chr_layout_idx](dst, f->cur.stride[0],
tmp[b->mask_sign], tmp[!b->mask_sign],
bw4 * 4, bh4 * 4, seg_mask, b->mask_sign);
mask = seg_mask;
break;
case COMP_INTER_WEDGE:
mask = dav1d_wedge_masks[bs][0][0][b->wedge_idx];
dsp->mc.mask(dst, f->cur.p.stride[0],
dsp->mc.mask(dst, f->cur.stride[0],
tmp[b->mask_sign], tmp[!b->mask_sign],
bw4 * 4, bh4 * 4, mask);
if (has_chroma)
@ -1404,11 +1408,10 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
for (int i = 0; i < 2; i++) {
const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
if (b->inter_mode == GLOBALMV_GLOBALMV &&
imin(cbw4, cbh4) > 1 && !f->frame_hdr.force_integer_mv &&
f->frame_hdr.gmv[b->ref[i]].type > WM_TYPE_TRANSLATION)
imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]])
{
res = warp_affine(t, NULL, tmp[i], bw4 * 2, b_dim, 1 + pl,
refp, &f->frame_hdr.gmv[b->ref[i]]);
refp, &f->frame_hdr->gmv[b->ref[i]]);
if (res) return res;
} else {
res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by,
@ -1416,19 +1419,19 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
if (res) return res;
}
}
pixel *const uvdst = ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff;
pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
switch (b->comp_type) {
case COMP_INTER_AVG:
dsp->mc.avg(uvdst, f->cur.p.stride[1], tmp[0], tmp[1],
dsp->mc.avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver);
break;
case COMP_INTER_WEIGHTED_AVG:
dsp->mc.w_avg(uvdst, f->cur.p.stride[1], tmp[0], tmp[1],
dsp->mc.w_avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, jnt_weight);
break;
case COMP_INTER_WEDGE:
case COMP_INTER_SEG:
dsp->mc.mask(uvdst, f->cur.p.stride[1],
dsp->mc.mask(uvdst, f->cur.stride[1],
tmp[b->mask_sign], tmp[!b->mask_sign],
bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, mask);
break;
@ -1437,11 +1440,11 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
}
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
hex_dump(dst, f->cur.p.stride[0], b_dim[0] * 4, b_dim[1] * 4, "y-pred");
hex_dump(dst, f->cur.stride[0], b_dim[0] * 4, b_dim[1] * 4, "y-pred");
if (has_chroma) {
hex_dump(&((pixel *) f->cur.p.data[1])[uvdstoff], f->cur.p.stride[1],
hex_dump(&((pixel *) f->cur.data[1])[uvdstoff], f->cur.stride[1],
cbw4 * 4, cbh4 * 4, "u-pred");
hex_dump(&((pixel *) f->cur.p.data[2])[uvdstoff], f->cur.p.stride[1],
hex_dump(&((pixel *) f->cur.data[2])[uvdstoff], f->cur.stride[1],
cbw4 * 4, cbh4 * 4, "v-pred");
}
}
@ -1473,7 +1476,7 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
for (int init_x = 0; init_x < bw4; init_x += 16) {
// coefficient coding & inverse transforms
int y_off = !!init_y, y;
dst += PXSTRIDE(f->cur.p.stride[0]) * 4 * init_y;
dst += PXSTRIDE(f->cur.stride[0]) * 4 * init_y;
for (y = init_y, t->by += init_y; y < imin(h4, init_y + 16);
y += ytx->h, y_off++)
{
@ -1485,17 +1488,17 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
x_off, y_off, &dst[x * 4]);
t->bx += ytx->w;
}
dst += PXSTRIDE(f->cur.p.stride[0]) * 4 * ytx->h;
dst += PXSTRIDE(f->cur.stride[0]) * 4 * ytx->h;
t->bx -= x;
t->by += ytx->h;
}
dst -= PXSTRIDE(f->cur.p.stride[0]) * 4 * y;
dst -= PXSTRIDE(f->cur.stride[0]) * 4 * y;
t->by -= y;
// chroma coefs and inverse transform
if (has_chroma) for (int pl = 0; pl < 2; pl++) {
pixel *uvdst = ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff +
(PXSTRIDE(f->cur.p.stride[1]) * init_y * 4 >> ss_ver);
pixel *uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff +
(PXSTRIDE(f->cur.stride[1]) * init_y * 4 >> ss_ver);
for (y = init_y >> ss_ver, t->by += init_y;
y < imin(ch4, (init_y + 16) >> ss_ver); y += uvtx->h)
{
@ -1544,15 +1547,15 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
coef_dump(cf, uvtx->h * 4, uvtx->w * 4, 3, "dq");
dsp->itx.itxfm_add[b->uvtx]
[txtp](&uvdst[4 * x],
f->cur.p.stride[1],
f->cur.stride[1],
cf, eob);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
hex_dump(&uvdst[4 * x], f->cur.p.stride[1],
hex_dump(&uvdst[4 * x], f->cur.stride[1],
uvtx->w * 4, uvtx->h * 4, "recon");
}
t->bx += uvtx->w << ss_hor;
}
uvdst += PXSTRIDE(f->cur.p.stride[1]) * 4 * uvtx->h;
uvdst += PXSTRIDE(f->cur.stride[1]) * 4 * uvtx->h;
t->bx -= x << ss_hor;
t->by += uvtx->h << ss_ver;
}
@ -1564,29 +1567,29 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
}
void bytefn(dav1d_filter_sbrow)(Dav1dFrameContext *const f, const int sby) {
const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int sbsz = f->sb_step, sbh = f->sbh;
if (f->frame_hdr.loopfilter.level_y[0] ||
f->frame_hdr.loopfilter.level_y[1])
if (f->frame_hdr->loopfilter.level_y[0] ||
f->frame_hdr->loopfilter.level_y[1])
{
int start_of_tile_row = 0;
if (f->frame_hdr.tiling.row_start_sb[f->lf.tile_row] == sby)
if (f->frame_hdr->tiling.row_start_sb[f->lf.tile_row] == sby)
start_of_tile_row = f->lf.tile_row++;
bytefn(dav1d_loopfilter_sbrow)(f, f->lf.p, f->lf.mask_ptr, sby,
start_of_tile_row);
}
if (f->seq_hdr.restoration) {
if (f->seq_hdr->restoration) {
// Store loop filtered pixels required by loop restoration
bytefn(dav1d_lr_copy_lpf)(f, f->lf.p, sby);
}
if (f->seq_hdr.cdef) {
if (f->seq_hdr->cdef) {
if (sby) {
const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
pixel *p_up[3] = {
f->lf.p[0] - 8 * PXSTRIDE(f->cur.p.stride[0]),
f->lf.p[1] - (8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver),
f->lf.p[2] - (8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver),
f->lf.p[0] - 8 * PXSTRIDE(f->cur.stride[0]),
f->lf.p[1] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
f->lf.p[2] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
};
bytefn(dav1d_cdef_brow)(f, p_up, f->lf.prev_mask_ptr,
sby * sbsz - 2, sby * sbsz);
@ -1595,15 +1598,39 @@ void bytefn(dav1d_filter_sbrow)(Dav1dFrameContext *const f, const int sby) {
bytefn(dav1d_cdef_brow)(f, f->lf.p, f->lf.mask_ptr, sby * sbsz,
imin(sby * sbsz + n_blks, f->bh));
}
if (f->seq_hdr.restoration) {
bytefn(dav1d_lr_sbrow)(f, f->lf.p, sby);
if (f->frame_hdr->super_res.enabled) {
const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;
for (int pl = 0; pl < 1 + 2 * has_chroma; pl++) {
const int ss_ver = pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int h_start = 8 * !!sby >> ss_ver;
const ptrdiff_t dst_stride = f->sr_cur.p.stride[!!pl];
pixel *dst = f->lf.sr_p[pl] - h_start * PXSTRIDE(dst_stride);
const ptrdiff_t src_stride = f->cur.stride[!!pl];
const pixel *src = f->lf.p[pl] - h_start * PXSTRIDE(src_stride);
const int h_end = 4 * (sbsz - 2 * (sby + 1 < sbh)) >> ss_ver;
const int ss_hor = pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int dst_w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
const int src_w = (4 * f->bw + ss_hor) >> ss_hor;
const int img_h = (f->cur.p.h - sbsz * 4 * sby + ss_ver) >> ss_ver;
f->dsp->mc.resize(dst, dst_stride, src, src_stride, dst_w, src_w,
imin(img_h, h_end) + h_start, f->resize_step[!!pl],
f->resize_start[!!pl]);
}
}
if (f->seq_hdr->restoration) {
bytefn(dav1d_lr_sbrow)(f, f->lf.sr_p, sby);
}
f->lf.p[0] += sbsz * 4 * PXSTRIDE(f->cur.p.stride[0]);
f->lf.p[1] += sbsz * 4 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
f->lf.p[2] += sbsz * 4 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
f->lf.p[0] += sbsz * 4 * PXSTRIDE(f->cur.stride[0]);
f->lf.p[1] += sbsz * 4 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
f->lf.p[2] += sbsz * 4 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
f->lf.sr_p[0] += sbsz * 4 * PXSTRIDE(f->sr_cur.p.stride[0]);
f->lf.sr_p[1] += sbsz * 4 * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver;
f->lf.sr_p[2] += sbsz * 4 * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver;
f->lf.prev_mask_ptr = f->lf.mask_ptr;
if ((sby & 1) || f->seq_hdr.sb128) {
if ((sby & 1) || f->seq_hdr->sb128) {
f->lf.mask_ptr += f->sb128w;
}
}
@ -1616,20 +1643,20 @@ void bytefn(dav1d_backup_ipred_edge)(Dav1dTileContext *const t) {
const int x_off = ts->tiling.col_start;
const pixel *const y =
((const pixel *) f->cur.p.data[0]) + x_off * 4 +
((t->by + f->sb_step) * 4 - 1) * PXSTRIDE(f->cur.p.stride[0]);
((const pixel *) f->cur.data[0]) + x_off * 4 +
((t->by + f->sb_step) * 4 - 1) * PXSTRIDE(f->cur.stride[0]);
pixel_copy(&f->ipred_edge[0][sby_off + x_off * 4], y,
4 * (ts->tiling.col_end - x_off));
if (f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const ptrdiff_t uv_off = (x_off * 4 >> ss_hor) +
(((t->by + f->sb_step) * 4 >> ss_ver) - 1) * PXSTRIDE(f->cur.p.stride[1]);
(((t->by + f->sb_step) * 4 >> ss_ver) - 1) * PXSTRIDE(f->cur.stride[1]);
for (int pl = 1; pl <= 2; pl++)
pixel_copy(&f->ipred_edge[pl][sby_off + (x_off * 4 >> ss_hor)],
&((const pixel *) f->cur.p.data[pl])[uv_off],
&((const pixel *) f->cur.data[pl])[uv_off],
4 * (ts->tiling.col_end - x_off) >> ss_hor);
}
}

View File

@ -84,3 +84,7 @@ void dav1d_ref_dec(Dav1dRef **const pref) {
}
*pref = NULL;
}
int dav1d_ref_is_writable(Dav1dRef *const ref) {
return atomic_load(&ref->ref_cnt) == 1;
}

View File

@ -48,4 +48,6 @@ Dav1dRef *dav1d_ref_wrap(const uint8_t *ptr,
void dav1d_ref_inc(Dav1dRef *ref);
void dav1d_ref_dec(Dav1dRef **ref);
int dav1d_ref_is_writable(Dav1dRef *ref);
#endif /* __DAV1D_SRC_REF_H__ */

View File

@ -301,15 +301,15 @@ static INLINE int is_global_mv_block(const MB_MODE_INFO *const mbmi,
const BLOCK_SIZE bsize = mbmi->sb_type;
const int block_size_allowed =
AOMMIN(block_size_wide[bsize], block_size_high[bsize]) >= 8;
return (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) && type > TRANSLATION &&
block_size_allowed;
return block_size_allowed && type > TRANSLATION &&
(mode == GLOBALMV || mode == GLOBAL_GLOBALMV);
}
typedef struct {
TransformationType wmtype;
int32_t wmmat[6];
int16_t alpha, beta, gamma, delta;
} WarpedMotionParams;
} Dav1dWarpedMotionParams;
#define REF_FRAMES_LOG2 3
#define REF_FRAMES (1 << REF_FRAMES_LOG2)
@ -381,7 +381,7 @@ typedef struct AV1Common {
// External BufferPool passed from outside.
BufferPool buffer_pool;
WarpedMotionParams global_motion[TOTAL_REFS_PER_FRAME];
Dav1dWarpedMotionParams global_motion[TOTAL_REFS_PER_FRAME];
struct {
BLOCK_SIZE sb_size;
int enable_order_hint;
@ -501,7 +501,7 @@ static INLINE int block_center_y(int mi_row, BLOCK_SIZE bs) {
// allow_hp is zero, the bottom bit will always be zero. If CONFIG_AMVR and
// is_integer is true, the bottom three bits will be zero (so the motion vector
// represents an integer)
static INLINE int_mv gm_get_motion_vector(const WarpedMotionParams *gm,
static INLINE int_mv gm_get_motion_vector(const Dav1dWarpedMotionParams *gm,
int allow_hp, BLOCK_SIZE bsize,
int mi_col, int mi_row,
int is_integer) {
@ -836,7 +836,7 @@ static void add_ref_mv_candidate(
const MB_MODE_INFO *const candidate, const MV_REFERENCE_FRAME rf[2],
uint8_t *refmv_count, uint8_t *ref_match_count, uint8_t *newmv_count,
CANDIDATE_MV *ref_mv_stack, int_mv *gm_mv_candidates,
const WarpedMotionParams *gm_params, int col, int weight) {
const Dav1dWarpedMotionParams *gm_params, int col, int weight) {
if (!is_inter_block(candidate)) return; // for intrabc
int index = 0, ref;
assert(weight % 2 == 0);
@ -1989,7 +1989,7 @@ int av1_init_ref_mv_common(AV1_COMMON *cm,
const unsigned cur_poc,
const unsigned ref_poc[7],
const unsigned ref_ref_poc[7][7],
const WarpedMotionParams gmv[7],
const Dav1dWarpedMotionParams gmv[7],
const int allow_hp,
const int force_int_mv,
const int allow_ref_frame_mvs,
@ -2003,7 +2003,7 @@ int av1_init_ref_mv_common(AV1_COMMON *cm,
const unsigned cur_poc,
const unsigned ref_poc[7],
const unsigned ref_ref_poc[7][7],
const WarpedMotionParams gmv[7],
const Dav1dWarpedMotionParams gmv[7],
const int allow_hp,
const int force_int_mv,
const int allow_ref_frame_mvs,

View File

@ -45,7 +45,7 @@ int av1_init_ref_mv_common(AV1_COMMON *cm,
unsigned cur_poc,
const unsigned ref_poc[7],
const unsigned ref_ref_poc[7][7],
const WarpedMotionParams gmv[7],
const Dav1dWarpedMotionParams gmv[7],
int allow_hp, int force_int_mv,
int allow_ref_frame_mvs, int order_hint);
@ -156,7 +156,7 @@ static inline void splat_intraref(refmvs *r, const ptrdiff_t stride,
} while (--bh4);
}
static inline void fix_mv_precision(const Av1FrameHeader *const hdr,
static inline void fix_mv_precision(const Dav1dFrameHeader *const hdr,
mv *const mv)
{
if (hdr->force_integer_mv) {

View File

@ -418,35 +418,35 @@ const uint8_t /* enum TxClass */ dav1d_tx_type_class[N_TX_TYPES_PLUS_LL] = {
[WHT_WHT] = TX_CLASS_2D,
};
const uint8_t /* enum Filter2d */ dav1d_filter_2d[N_FILTERS][N_FILTERS] = {
[FILTER_8TAP_REGULAR] = {
[FILTER_8TAP_REGULAR] = FILTER_2D_8TAP_REGULAR,
[FILTER_8TAP_SHARP] = FILTER_2D_8TAP_REGULAR_SHARP,
[FILTER_8TAP_SMOOTH] = FILTER_2D_8TAP_REGULAR_SMOOTH,
}, [FILTER_8TAP_SHARP] = {
[FILTER_8TAP_REGULAR] = FILTER_2D_8TAP_SHARP_REGULAR,
[FILTER_8TAP_SHARP] = FILTER_2D_8TAP_SHARP,
[FILTER_8TAP_SMOOTH] = FILTER_2D_8TAP_SHARP_SMOOTH,
}, [FILTER_8TAP_SMOOTH] = {
[FILTER_8TAP_REGULAR] = FILTER_2D_8TAP_SMOOTH_REGULAR,
[FILTER_8TAP_SHARP] = FILTER_2D_8TAP_SMOOTH_SHARP,
[FILTER_8TAP_SMOOTH] = FILTER_2D_8TAP_SMOOTH,
}, [FILTER_BILINEAR] = {
[FILTER_BILINEAR] = FILTER_2D_BILINEAR,
const uint8_t /* enum Filter2d */ dav1d_filter_2d[DAV1D_N_FILTERS][DAV1D_N_FILTERS] = {
[DAV1D_FILTER_8TAP_REGULAR] = {
[DAV1D_FILTER_8TAP_REGULAR] = FILTER_2D_8TAP_REGULAR,
[DAV1D_FILTER_8TAP_SHARP] = FILTER_2D_8TAP_REGULAR_SHARP,
[DAV1D_FILTER_8TAP_SMOOTH] = FILTER_2D_8TAP_REGULAR_SMOOTH,
}, [DAV1D_FILTER_8TAP_SHARP] = {
[DAV1D_FILTER_8TAP_REGULAR] = FILTER_2D_8TAP_SHARP_REGULAR,
[DAV1D_FILTER_8TAP_SHARP] = FILTER_2D_8TAP_SHARP,
[DAV1D_FILTER_8TAP_SMOOTH] = FILTER_2D_8TAP_SHARP_SMOOTH,
}, [DAV1D_FILTER_8TAP_SMOOTH] = {
[DAV1D_FILTER_8TAP_REGULAR] = FILTER_2D_8TAP_SMOOTH_REGULAR,
[DAV1D_FILTER_8TAP_SHARP] = FILTER_2D_8TAP_SMOOTH_SHARP,
[DAV1D_FILTER_8TAP_SMOOTH] = FILTER_2D_8TAP_SMOOTH,
}, [DAV1D_FILTER_BILINEAR] = {
[DAV1D_FILTER_BILINEAR] = FILTER_2D_BILINEAR,
}
};
const uint8_t /* enum FilterMode */ dav1d_filter_dir[N_2D_FILTERS][2] = {
[FILTER_2D_8TAP_REGULAR] = { FILTER_8TAP_REGULAR, FILTER_8TAP_REGULAR },
[FILTER_2D_8TAP_REGULAR_SMOOTH] = { FILTER_8TAP_SMOOTH, FILTER_8TAP_REGULAR },
[FILTER_2D_8TAP_REGULAR_SHARP] = { FILTER_8TAP_SHARP, FILTER_8TAP_REGULAR },
[FILTER_2D_8TAP_SHARP_REGULAR] = { FILTER_8TAP_REGULAR, FILTER_8TAP_SHARP },
[FILTER_2D_8TAP_SHARP_SMOOTH] = { FILTER_8TAP_SMOOTH, FILTER_8TAP_SHARP },
[FILTER_2D_8TAP_SHARP] = { FILTER_8TAP_SHARP, FILTER_8TAP_SHARP },
[FILTER_2D_8TAP_SMOOTH_REGULAR] = { FILTER_8TAP_REGULAR, FILTER_8TAP_SMOOTH },
[FILTER_2D_8TAP_SMOOTH] = { FILTER_8TAP_SMOOTH, FILTER_8TAP_SMOOTH },
[FILTER_2D_8TAP_SMOOTH_SHARP] = { FILTER_8TAP_SHARP, FILTER_8TAP_SMOOTH },
[FILTER_2D_BILINEAR] = { FILTER_BILINEAR, FILTER_BILINEAR },
const uint8_t /* enum Dav1dFilterMode */ dav1d_filter_dir[N_2D_FILTERS][2] = {
[FILTER_2D_8TAP_REGULAR] = { DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_REGULAR },
[FILTER_2D_8TAP_REGULAR_SMOOTH] = { DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_REGULAR },
[FILTER_2D_8TAP_REGULAR_SHARP] = { DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_REGULAR },
[FILTER_2D_8TAP_SHARP_REGULAR] = { DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SHARP },
[FILTER_2D_8TAP_SHARP_SMOOTH] = { DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SHARP },
[FILTER_2D_8TAP_SHARP] = { DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SHARP },
[FILTER_2D_8TAP_SMOOTH_REGULAR] = { DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SMOOTH },
[FILTER_2D_8TAP_SMOOTH] = { DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SMOOTH },
[FILTER_2D_8TAP_SMOOTH_SHARP] = { DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SMOOTH },
[FILTER_2D_BILINEAR] = { DAV1D_FILTER_BILINEAR, DAV1D_FILTER_BILINEAR },
};
const uint8_t dav1d_filter_mode_to_y_mode[5] = {
@ -481,8 +481,8 @@ const uint8_t dav1d_wedge_ctx_lut[N_BS_SIZES] = {
[BS_8x8] = 0,
};
const WarpedMotionParams dav1d_default_wm_params = {
.type = WM_TYPE_IDENTITY,
const Dav1dWarpedMotionParams dav1d_default_wm_params = {
.type = DAV1D_WM_TYPE_IDENTITY,
.matrix = {
0, 0, 1 << 16,
0, 0, 1 << 16,
@ -524,7 +524,7 @@ const int dav1d_sgr_x_by_xplus1[256] = {
};
const int8_t ALIGN(dav1d_mc_subpel_filters[5][15][8], 8) = {
[FILTER_8TAP_REGULAR] = {
[DAV1D_FILTER_8TAP_REGULAR] = {
{ 0, 1, -3, 63, 4, -1, 0, 0 },
{ 0, 1, -5, 61, 9, -2, 0, 0 },
{ 0, 1, -6, 58, 14, -4, 1, 0 },
@ -540,7 +540,7 @@ const int8_t ALIGN(dav1d_mc_subpel_filters[5][15][8], 8) = {
{ 0, 1, -4, 14, 58, -6, 1, 0 },
{ 0, 0, -2, 9, 61, -5, 1, 0 },
{ 0, 0, -1, 4, 63, -3, 1, 0 }
}, [FILTER_8TAP_SMOOTH] = {
}, [DAV1D_FILTER_8TAP_SMOOTH] = {
{ 0, 1, 14, 31, 17, 1, 0, 0 },
{ 0, 0, 13, 31, 18, 2, 0, 0 },
{ 0, 0, 11, 31, 20, 2, 0, 0 },
@ -556,7 +556,7 @@ const int8_t ALIGN(dav1d_mc_subpel_filters[5][15][8], 8) = {
{ 0, 0, 2, 20, 31, 11, 0, 0 },
{ 0, 0, 2, 18, 31, 13, 0, 0 },
{ 0, 0, 1, 17, 31, 14, 1, 0 }
}, [FILTER_8TAP_SHARP] = {
}, [DAV1D_FILTER_8TAP_SHARP] = {
{ -1, 1, -3, 63, 4, -1, 1, 0 },
{ -1, 3, -6, 62, 8, -3, 2, -1 },
{ -1, 4, -9, 60, 13, -5, 3, -1 },
@ -573,7 +573,7 @@ const int8_t ALIGN(dav1d_mc_subpel_filters[5][15][8], 8) = {
{ -1, 2, -3, 8, 62, -6, 3, -1 },
{ 0, 1, -1, 4, 63, -3, 1, -1 }
/* width <= 4 */
}, [3 + FILTER_8TAP_REGULAR] = {
}, [3 + DAV1D_FILTER_8TAP_REGULAR] = {
{ 0, 0, -2, 63, 4, -1, 0, 0 },
{ 0, 0, -4, 61, 9, -2, 0, 0 },
{ 0, 0, -5, 58, 14, -3, 0, 0 },
@ -589,7 +589,7 @@ const int8_t ALIGN(dav1d_mc_subpel_filters[5][15][8], 8) = {
{ 0, 0, -3, 14, 58, -5, 0, 0 },
{ 0, 0, -2, 9, 61, -4, 0, 0 },
{ 0, 0, -1, 4, 63, -2, 0, 0 }
}, [3 + FILTER_8TAP_SMOOTH] = {
}, [3 + DAV1D_FILTER_8TAP_SMOOTH] = {
{ 0, 0, 15, 31, 17, 1, 0, 0 },
{ 0, 0, 13, 31, 18, 2, 0, 0 },
{ 0, 0, 11, 31, 20, 2, 0, 0 },
@ -712,6 +712,41 @@ const int8_t ALIGN(dav1d_mc_warp_filter[193][8], 8) = {
{ 0, 0, 2, -1, 0, 0, 127, 0 }
};
const int16_t dav1d_resize_filter[64][8] = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -1, 128, 2, -1, 0, 0 },
{ 0, 1, -3, 127, 4, -2, 1, 0 }, { 0, 1, -4, 127, 6, -3, 1, 0 },
{ 0, 2, -6, 126, 8, -3, 1, 0 }, { 0, 2, -7, 125, 11, -4, 1, 0 },
{ -1, 2, -8, 125, 13, -5, 2, 0 }, { -1, 3, -9, 124, 15, -6, 2, 0 },
{ -1, 3, -10, 123, 18, -6, 2, -1 }, { -1, 3, -11, 122, 20, -7, 3, -1 },
{ -1, 4, -12, 121, 22, -8, 3, -1 }, { -1, 4, -13, 120, 25, -9, 3, -1 },
{ -1, 4, -14, 118, 28, -9, 3, -1 }, { -1, 4, -15, 117, 30, -10, 4, -1 },
{ -1, 5, -16, 116, 32, -11, 4, -1 }, { -1, 5, -16, 114, 35, -12, 4, -1 },
{ -1, 5, -17, 112, 38, -12, 4, -1 }, { -1, 5, -18, 111, 40, -13, 5, -1 },
{ -1, 5, -18, 109, 43, -14, 5, -1 }, { -1, 6, -19, 107, 45, -14, 5, -1 },
{ -1, 6, -19, 105, 48, -15, 5, -1 }, { -1, 6, -19, 103, 51, -16, 5, -1 },
{ -1, 6, -20, 101, 53, -16, 6, -1 }, { -1, 6, -20, 99, 56, -17, 6, -1 },
{ -1, 6, -20, 97, 58, -17, 6, -1 }, { -1, 6, -20, 95, 61, -18, 6, -1 },
{ -2, 7, -20, 93, 64, -18, 6, -2 }, { -2, 7, -20, 91, 66, -19, 6, -1 },
{ -2, 7, -20, 88, 69, -19, 6, -1 }, { -2, 7, -20, 86, 71, -19, 6, -1 },
{ -2, 7, -20, 84, 74, -20, 7, -2 }, { -2, 7, -20, 81, 76, -20, 7, -1 },
{ -2, 7, -20, 79, 79, -20, 7, -2 }, { -1, 7, -20, 76, 81, -20, 7, -2 },
{ -2, 7, -20, 74, 84, -20, 7, -2 }, { -1, 6, -19, 71, 86, -20, 7, -2 },
{ -1, 6, -19, 69, 88, -20, 7, -2 }, { -1, 6, -19, 66, 91, -20, 7, -2 },
{ -2, 6, -18, 64, 93, -20, 7, -2 }, { -1, 6, -18, 61, 95, -20, 6, -1 },
{ -1, 6, -17, 58, 97, -20, 6, -1 }, { -1, 6, -17, 56, 99, -20, 6, -1 },
{ -1, 6, -16, 53, 101, -20, 6, -1 }, { -1, 5, -16, 51, 103, -19, 6, -1 },
{ -1, 5, -15, 48, 105, -19, 6, -1 }, { -1, 5, -14, 45, 107, -19, 6, -1 },
{ -1, 5, -14, 43, 109, -18, 5, -1 }, { -1, 5, -13, 40, 111, -18, 5, -1 },
{ -1, 4, -12, 38, 112, -17, 5, -1 }, { -1, 4, -12, 35, 114, -16, 5, -1 },
{ -1, 4, -11, 32, 116, -16, 5, -1 }, { -1, 4, -10, 30, 117, -15, 4, -1 },
{ -1, 3, -9, 28, 118, -14, 4, -1 }, { -1, 3, -9, 25, 120, -13, 4, -1 },
{ -1, 3, -8, 22, 121, -12, 4, -1 }, { -1, 3, -7, 20, 122, -11, 3, -1 },
{ -1, 2, -6, 18, 123, -10, 3, -1 }, { 0, 2, -6, 15, 124, -9, 3, -1 },
{ 0, 2, -5, 13, 125, -8, 2, -1 }, { 0, 1, -4, 11, 125, -7, 2, 0 },
{ 0, 1, -3, 8, 126, -6, 2, 0 }, { 0, 1, -3, 6, 127, -4, 1, 0 },
{ 0, 1, -2, 4, 127, -3, 1, 0 }, { 0, 0, -1, 2, 128, -1, 0, 0 },
};
const uint8_t dav1d_sm_weights[128] = {
// Unused, because we always offset by bs, which is at least 2.
0, 0,
@ -837,3 +872,194 @@ const uint8_t ALIGN(dav1d_obmc_masks[64], 32) = {
31, 29, 28, 26, 24, 23, 21, 20, 19, 17, 16, 14, 13, 12, 11, 9,
8, 7, 6, 5, 4, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0,
};
// Taken from the spec. Range is [-2048, 2047], mean is 0 and stddev is 512
const int16_t dav1d_gaussian_sequence[2048] = {
56, 568, -180, 172, 124, -84, 172, -64, -900, 24, 820,
224, 1248, 996, 272, -8, -916, -388, -732, -104, -188, 800,
112, -652, -320, -376, 140, -252, 492, -168, 44, -788, 588,
-584, 500, -228, 12, 680, 272, -476, 972, -100, 652, 368,
432, -196, -720, -192, 1000, -332, 652, -136, -552, -604, -4,
192, -220, -136, 1000, -52, 372, -96, -624, 124, -24, 396,
540, -12, -104, 640, 464, 244, -208, -84, 368, -528, -740,
248, -968, -848, 608, 376, -60, -292, -40, -156, 252, -292,
248, 224, -280, 400, -244, 244, -60, 76, -80, 212, 532,
340, 128, -36, 824, -352, -60, -264, -96, -612, 416, -704,
220, -204, 640, -160, 1220, -408, 900, 336, 20, -336, -96,
-792, 304, 48, -28, -1232, -1172, -448, 104, -292, -520, 244,
60, -948, 0, -708, 268, 108, 356, -548, 488, -344, -136,
488, -196, -224, 656, -236, -1128, 60, 4, 140, 276, -676,
-376, 168, -108, 464, 8, 564, 64, 240, 308, -300, -400,
-456, -136, 56, 120, -408, -116, 436, 504, -232, 328, 844,
-164, -84, 784, -168, 232, -224, 348, -376, 128, 568, 96,
-1244, -288, 276, 848, 832, -360, 656, 464, -384, -332, -356,
728, -388, 160, -192, 468, 296, 224, 140, -776, -100, 280,
4, 196, 44, -36, -648, 932, 16, 1428, 28, 528, 808,
772, 20, 268, 88, -332, -284, 124, -384, -448, 208, -228,
-1044, -328, 660, 380, -148, -300, 588, 240, 540, 28, 136,
-88, -436, 256, 296, -1000, 1400, 0, -48, 1056, -136, 264,
-528, -1108, 632, -484, -592, -344, 796, 124, -668, -768, 388,
1296, -232, -188, -200, -288, -4, 308, 100, -168, 256, -500,
204, -508, 648, -136, 372, -272, -120, -1004, -552, -548, -384,
548, -296, 428, -108, -8, -912, -324, -224, -88, -112, -220,
-100, 996, -796, 548, 360, -216, 180, 428, -200, -212, 148,
96, 148, 284, 216, -412, -320, 120, -300, -384, -604, -572,
-332, -8, -180, -176, 696, 116, -88, 628, 76, 44, -516,
240, -208, -40, 100, -592, 344, -308, -452, -228, 20, 916,
-1752, -136, -340, -804, 140, 40, 512, 340, 248, 184, -492,
896, -156, 932, -628, 328, -688, -448, -616, -752, -100, 560,
-1020, 180, -800, -64, 76, 576, 1068, 396, 660, 552, -108,
-28, 320, -628, 312, -92, -92, -472, 268, 16, 560, 516,
-672, -52, 492, -100, 260, 384, 284, 292, 304, -148, 88,
-152, 1012, 1064, -228, 164, -376, -684, 592, -392, 156, 196,
-524, -64, -884, 160, -176, 636, 648, 404, -396, -436, 864,
424, -728, 988, -604, 904, -592, 296, -224, 536, -176, -920,
436, -48, 1176, -884, 416, -776, -824, -884, 524, -548, -564,
-68, -164, -96, 692, 364, -692, -1012, -68, 260, -480, 876,
-1116, 452, -332, -352, 892, -1088, 1220, -676, 12, -292, 244,
496, 372, -32, 280, 200, 112, -440, -96, 24, -644, -184,
56, -432, 224, -980, 272, -260, 144, -436, 420, 356, 364,
-528, 76, 172, -744, -368, 404, -752, -416, 684, -688, 72,
540, 416, 92, 444, 480, -72, -1416, 164, -1172, -68, 24,
424, 264, 1040, 128, -912, -524, -356, 64, 876, -12, 4,
-88, 532, 272, -524, 320, 276, -508, 940, 24, -400, -120,
756, 60, 236, -412, 100, 376, -484, 400, -100, -740, -108,
-260, 328, -268, 224, -200, -416, 184, -604, -564, -20, 296,
60, 892, -888, 60, 164, 68, -760, 216, -296, 904, -336,
-28, 404, -356, -568, -208, -1480, -512, 296, 328, -360, -164,
-1560, -776, 1156, -428, 164, -504, -112, 120, -216, -148, -264,
308, 32, 64, -72, 72, 116, 176, -64, -272, 460, -536,
-784, -280, 348, 108, -752, -132, 524, -540, -776, 116, -296,
-1196, -288, -560, 1040, -472, 116, -848, -1116, 116, 636, 696,
284, -176, 1016, 204, -864, -648, -248, 356, 972, -584, -204,
264, 880, 528, -24, -184, 116, 448, -144, 828, 524, 212,
-212, 52, 12, 200, 268, -488, -404, -880, 824, -672, -40,
908, -248, 500, 716, -576, 492, -576, 16, 720, -108, 384,
124, 344, 280, 576, -500, 252, 104, -308, 196, -188, -8,
1268, 296, 1032, -1196, 436, 316, 372, -432, -200, -660, 704,
-224, 596, -132, 268, 32, -452, 884, 104, -1008, 424, -1348,
-280, 4, -1168, 368, 476, 696, 300, -8, 24, 180, -592,
-196, 388, 304, 500, 724, -160, 244, -84, 272, -256, -420,
320, 208, -144, -156, 156, 364, 452, 28, 540, 316, 220,
-644, -248, 464, 72, 360, 32, -388, 496, -680, -48, 208,
-116, -408, 60, -604, -392, 548, -840, 784, -460, 656, -544,
-388, -264, 908, -800, -628, -612, -568, 572, -220, 164, 288,
-16, -308, 308, -112, -636, -760, 280, -668, 432, 364, 240,
-196, 604, 340, 384, 196, 592, -44, -500, 432, -580, -132,
636, -76, 392, 4, -412, 540, 508, 328, -356, -36, 16,
-220, -64, -248, -60, 24, -192, 368, 1040, 92, -24, -1044,
-32, 40, 104, 148, 192, -136, -520, 56, -816, -224, 732,
392, 356, 212, -80, -424, -1008, -324, 588, -1496, 576, 460,
-816, -848, 56, -580, -92, -1372, -112, -496, 200, 364, 52,
-140, 48, -48, -60, 84, 72, 40, 132, -356, -268, -104,
-284, -404, 732, -520, 164, -304, -540, 120, 328, -76, -460,
756, 388, 588, 236, -436, -72, -176, -404, -316, -148, 716,
-604, 404, -72, -88, -888, -68, 944, 88, -220, -344, 960,
472, 460, -232, 704, 120, 832, -228, 692, -508, 132, -476,
844, -748, -364, -44, 1116, -1104, -1056, 76, 428, 552, -692,
60, 356, 96, -384, -188, -612, -576, 736, 508, 892, 352,
-1132, 504, -24, -352, 324, 332, -600, -312, 292, 508, -144,
-8, 484, 48, 284, -260, -240, 256, -100, -292, -204, -44,
472, -204, 908, -188, -1000, -256, 92, 1164, -392, 564, 356,
652, -28, -884, 256, 484, -192, 760, -176, 376, -524, -452,
-436, 860, -736, 212, 124, 504, -476, 468, 76, -472, 552,
-692, -944, -620, 740, -240, 400, 132, 20, 192, -196, 264,
-668, -1012, -60, 296, -316, -828, 76, -156, 284, -768, -448,
-832, 148, 248, 652, 616, 1236, 288, -328, -400, -124, 588,
220, 520, -696, 1032, 768, -740, -92, -272, 296, 448, -464,
412, -200, 392, 440, -200, 264, -152, -260, 320, 1032, 216,
320, -8, -64, 156, -1016, 1084, 1172, 536, 484, -432, 132,
372, -52, -256, 84, 116, -352, 48, 116, 304, -384, 412,
924, -300, 528, 628, 180, 648, 44, -980, -220, 1320, 48,
332, 748, 524, -268, -720, 540, -276, 564, -344, -208, -196,
436, 896, 88, -392, 132, 80, -964, -288, 568, 56, -48,
-456, 888, 8, 552, -156, -292, 948, 288, 128, -716, -292,
1192, -152, 876, 352, -600, -260, -812, -468, -28, -120, -32,
-44, 1284, 496, 192, 464, 312, -76, -516, -380, -456, -1012,
-48, 308, -156, 36, 492, -156, -808, 188, 1652, 68, -120,
-116, 316, 160, -140, 352, 808, -416, 592, 316, -480, 56,
528, -204, -568, 372, -232, 752, -344, 744, -4, 324, -416,
-600, 768, 268, -248, -88, -132, -420, -432, 80, -288, 404,
-316, -1216, -588, 520, -108, 92, -320, 368, -480, -216, -92,
1688, -300, 180, 1020, -176, 820, -68, -228, -260, 436, -904,
20, 40, -508, 440, -736, 312, 332, 204, 760, -372, 728,
96, -20, -632, -520, -560, 336, 1076, -64, -532, 776, 584,
192, 396, -728, -520, 276, -188, 80, -52, -612, -252, -48,
648, 212, -688, 228, -52, -260, 428, -412, -272, -404, 180,
816, -796, 48, 152, 484, -88, -216, 988, 696, 188, -528,
648, -116, -180, 316, 476, 12, -564, 96, 476, -252, -364,
-376, -392, 556, -256, -576, 260, -352, 120, -16, -136, -260,
-492, 72, 556, 660, 580, 616, 772, 436, 424, -32, -324,
-1268, 416, -324, -80, 920, 160, 228, 724, 32, -516, 64,
384, 68, -128, 136, 240, 248, -204, -68, 252, -932, -120,
-480, -628, -84, 192, 852, -404, -288, -132, 204, 100, 168,
-68, -196, -868, 460, 1080, 380, -80, 244, 0, 484, -888,
64, 184, 352, 600, 460, 164, 604, -196, 320, -64, 588,
-184, 228, 12, 372, 48, -848, -344, 224, 208, -200, 484,
128, -20, 272, -468, -840, 384, 256, -720, -520, -464, -580,
112, -120, 644, -356, -208, -608, -528, 704, 560, -424, 392,
828, 40, 84, 200, -152, 0, -144, 584, 280, -120, 80,
-556, -972, -196, -472, 724, 80, 168, -32, 88, 160, -688,
0, 160, 356, 372, -776, 740, -128, 676, -248, -480, 4,
-364, 96, 544, 232, -1032, 956, 236, 356, 20, -40, 300,
24, -676, -596, 132, 1120, -104, 532, -1096, 568, 648, 444,
508, 380, 188, -376, -604, 1488, 424, 24, 756, -220, -192,
716, 120, 920, 688, 168, 44, -460, 568, 284, 1144, 1160,
600, 424, 888, 656, -356, -320, 220, 316, -176, -724, -188,
-816, -628, -348, -228, -380, 1012, -452, -660, 736, 928, 404,
-696, -72, -268, -892, 128, 184, -344, -780, 360, 336, 400,
344, 428, 548, -112, 136, -228, -216, -820, -516, 340, 92,
-136, 116, -300, 376, -244, 100, -316, -520, -284, -12, 824,
164, -548, -180, -128, 116, -924, -828, 268, -368, -580, 620,
192, 160, 0, -1676, 1068, 424, -56, -360, 468, -156, 720,
288, -528, 556, -364, 548, -148, 504, 316, 152, -648, -620,
-684, -24, -376, -384, -108, -920, -1032, 768, 180, -264, -508,
-1268, -260, -60, 300, -240, 988, 724, -376, -576, -212, -736,
556, 192, 1092, -620, -880, 376, -56, -4, -216, -32, 836,
268, 396, 1332, 864, -600, 100, 56, -412, -92, 356, 180,
884, -468, -436, 292, -388, -804, -704, -840, 368, -348, 140,
-724, 1536, 940, 372, 112, -372, 436, -480, 1136, 296, -32,
-228, 132, -48, -220, 868, -1016, -60, -1044, -464, 328, 916,
244, 12, -736, -296, 360, 468, -376, -108, -92, 788, 368,
-56, 544, 400, -672, -420, 728, 16, 320, 44, -284, -380,
-796, 488, 132, 204, -596, -372, 88, -152, -908, -636, -572,
-624, -116, -692, -200, -56, 276, -88, 484, -324, 948, 864,
1000, -456, -184, -276, 292, -296, 156, 676, 320, 160, 908,
-84, -1236, -288, -116, 260, -372, -644, 732, -756, -96, 84,
344, -520, 348, -688, 240, -84, 216, -1044, -136, -676, -396,
-1500, 960, -40, 176, 168, 1516, 420, -504, -344, -364, -360,
1216, -940, -380, -212, 252, -660, -708, 484, -444, -152, 928,
-120, 1112, 476, -260, 560, -148, -344, 108, -196, 228, -288,
504, 560, -328, -88, 288, -1008, 460, -228, 468, -836, -196,
76, 388, 232, 412, -1168, -716, -644, 756, -172, -356, -504,
116, 432, 528, 48, 476, -168, -608, 448, 160, -532, -272,
28, -676, -12, 828, 980, 456, 520, 104, -104, 256, -344,
-4, -28, -368, -52, -524, -572, -556, -200, 768, 1124, -208,
-512, 176, 232, 248, -148, -888, 604, -600, -304, 804, -156,
-212, 488, -192, -804, -256, 368, -360, -916, -328, 228, -240,
-448, -472, 856, -556, -364, 572, -12, -156, -368, -340, 432,
252, -752, -152, 288, 268, -580, -848, -592, 108, -76, 244,
312, -716, 592, -80, 436, 360, 4, -248, 160, 516, 584,
732, 44, -468, -280, -292, -156, -588, 28, 308, 912, 24,
124, 156, 180, -252, 944, -924, -772, -520, -428, -624, 300,
-212, -1144, 32, -724, 800, -1128, -212, -1288, -848, 180, -416,
440, 192, -576, -792, -76, -1080, 80, -532, -352, -132, 380,
-820, 148, 1112, 128, 164, 456, 700, -924, 144, -668, -384,
648, -832, 508, 552, -52, -100, -656, 208, -568, 748, -88,
680, 232, 300, 192, -408, -1012, -152, -252, -268, 272, -876,
-664, -648, -332, -136, 16, 12, 1152, -28, 332, -536, 320,
-672, -460, -316, 532, -260, 228, -40, 1052, -816, 180, 88,
-496, -556, -672, -368, 428, 92, 356, 404, -408, 252, 196,
-176, -556, 792, 268, 32, 372, 40, 96, -332, 328, 120,
372, -900, -40, 472, -264, -592, 952, 128, 656, 112, 664,
-232, 420, 4, -344, -464, 556, 244, -416, -32, 252, 0,
-412, 188, -696, 508, -476, 324, -1096, 656, -312, 560, 264,
-136, 304, 160, -64, -580, 248, 336, -720, 560, -348, -288,
-276, -196, -500, 852, -544, -236, -1128, -992, -776, 116, 56,
52, 860, 884, 212, -12, 168, 1020, 512, -552, 924, -148,
716, 188, 164, -340, -520, -184, 880, -152, -680, -208, -1156,
-300, -528, -472, 364, 100, -744, -1056, -32, 540, 280, 144,
-676, -32, -232, -280, -224, 96, 568, -76, 172, 148, 148,
104, 32, -296, -32, 788, -80, 32, -16, 280, 288, 944,
428, -484
};

View File

@ -63,8 +63,8 @@ extern const uint8_t dav1d_nz_map_ctx_offset[N_RECT_TX_SIZES][5][5];
extern const uint8_t /* enum TxClass */
dav1d_tx_type_class[N_TX_TYPES_PLUS_LL];
extern const uint8_t /* enum Filter2d */
dav1d_filter_2d[N_FILTERS /* h */][N_FILTERS /* v */];
extern const uint8_t /* enum FilterMode */ dav1d_filter_dir[N_2D_FILTERS][2];
dav1d_filter_2d[DAV1D_N_FILTERS /* h */][DAV1D_N_FILTERS /* v */];
extern const uint8_t /* enum Dav1dFilterMode */ dav1d_filter_dir[N_2D_FILTERS][2];
extern const uint8_t dav1d_intra_mode_context[N_INTRA_PRED_MODES];
extern const uint8_t dav1d_wedge_ctx_lut[N_BS_SIZES];
@ -104,13 +104,14 @@ static const unsigned interintra_allowed_mask =
(1 << BS_8x16) |
(1 << BS_8x8);
extern const WarpedMotionParams dav1d_default_wm_params;
extern const Dav1dWarpedMotionParams dav1d_default_wm_params;
extern const int16_t dav1d_sgr_params[16][4];
extern const int dav1d_sgr_x_by_xplus1[256];
extern const int8_t dav1d_mc_subpel_filters[5][15][8];
extern const int8_t dav1d_mc_warp_filter[193][8];
extern const int16_t dav1d_resize_filter[64][8];
extern const uint8_t dav1d_sm_weights[128];
extern const int16_t dav1d_dr_intra_derivative[90];
@ -118,4 +119,6 @@ extern const int8_t dav1d_filter_intra_taps[5][64];
extern const uint8_t dav1d_obmc_masks[64];
extern const int16_t dav1d_gaussian_sequence[2048]; // for fgs
#endif /* __DAV1D_SRC_TABLES_H__ */

View File

@ -41,7 +41,10 @@ void *dav1d_frame_task(void *const data) {
if (f->frame_thread.die) break;
pthread_mutex_unlock(&f->frame_thread.td.lock);
dav1d_decode_frame(f);
const int res = dav1d_decode_frame(f);
if (res)
memset(f->frame_thread.cf, 0,
sizeof(int32_t) * 3 * f->lf.mask_sz * 128 * 128);
pthread_mutex_lock(&f->frame_thread.td.lock);
f->n_tile_data = 0;
@ -79,7 +82,7 @@ void *dav1d_tile_task(void *const data) {
const int task_idx = fttd->num_tasks - fttd->tasks_left--;
pthread_mutex_unlock(&fttd->lock);
if (f->frame_thread.pass == 1 || f->n_tc >= f->frame_hdr.tiling.cols) {
if (f->frame_thread.pass == 1 || f->n_tc >= f->frame_hdr->tiling.cols) {
// we can (or in fact, if >, we need to) do full tile decoding.
// loopfilter happens in the main thread
Dav1dTileState *const ts = t->ts = &f->ts[task_idx];

View File

@ -78,7 +78,7 @@ static inline int resolve_divisor_32(const unsigned d, int *const shift) {
return div_lut[f];
}
int dav1d_get_shear_params(WarpedMotionParams *const wm) {
int dav1d_get_shear_params(Dav1dWarpedMotionParams *const wm) {
const int32_t *const mat = wm->matrix;
if (mat[2] <= 0) return 1;
@ -129,7 +129,7 @@ static int get_mult_shift_diag(const int64_t px,
int dav1d_find_affine_int(const int (*pts)[2][2], const int np,
const int bw4, const int bh4,
const mv mv, WarpedMotionParams *const wm,
const mv mv, Dav1dWarpedMotionParams *const wm,
const int bx4, const int by4)
{
int32_t *const mat = wm->matrix;

View File

@ -30,8 +30,8 @@
#include "src/levels.h"
int dav1d_get_shear_params(WarpedMotionParams *wm);
int dav1d_get_shear_params(Dav1dWarpedMotionParams *wm);
int dav1d_find_affine_int(const int (*pts)[2][2], int np, int bw4, int bh4,
mv mv, WarpedMotionParams *wm, int by, int bx);
mv mv, Dav1dWarpedMotionParams *wm, int by, int bx);
#endif /* __DAV1D_SRC_WARPMV_H__ */

View File

@ -51,8 +51,11 @@ decl_mct_fn(dav1d_prep_8tap_sharp_smooth_avx2);
decl_mct_fn(dav1d_prep_bilin_avx2);
decl_avg_fn(dav1d_avg_avx2);
decl_avg_fn(dav1d_avg_ssse3);
decl_w_avg_fn(dav1d_w_avg_avx2);
decl_w_avg_fn(dav1d_w_avg_ssse3);
decl_mask_fn(dav1d_mask_avx2);
decl_mask_fn(dav1d_mask_ssse3);
decl_w_mask_fn(dav1d_w_mask_420_avx2);
decl_blend_fn(dav1d_blend_avx2);
decl_blend_dir_fn(dav1d_blend_v_avx2);
@ -70,7 +73,18 @@ void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) {
c->mct[type] = dav1d_prep_##name##_##suffix
const unsigned flags = dav1d_get_cpu_flags();
if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
if(!(flags & DAV1D_X86_CPU_FLAG_SSSE3))
return;
#if BITDEPTH == 8 && ARCH_X86_64
c->avg = dav1d_avg_ssse3;
c->w_avg = dav1d_w_avg_ssse3;
c->mask = dav1d_mask_ssse3;
#endif
if (!(flags & DAV1D_X86_CPU_FLAG_AVX2))
return;
#if BITDEPTH == 8 && ARCH_X86_64
init_mc_fn (FILTER_2D_8TAP_REGULAR, 8tap_regular, avx2);

251
third_party/dav1d/src/x86/mc_ssse3.asm vendored Normal file
View File

@ -0,0 +1,251 @@
; Copyright © 2018, VideoLAN and dav1d authors
; Copyright © 2018, Two Orioles, LLC
; Copyright © 2018, VideoLabs
; All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions are met:
;
; 1. Redistributions of source code must retain the above copyright notice, this
; list of conditions and the following disclaimer.
;
; 2. Redistributions in binary form must reproduce the above copyright notice,
; this list of conditions and the following disclaimer in the documentation
; and/or other materials provided with the distribution.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%include "config.asm"
%include "ext/x86/x86inc.asm"
%if ARCH_X86_64
SECTION_RODATA 16
pw_1024: times 8 dw 1024
pw_2048: times 8 dw 2048
%macro BIDIR_JMP_TABLE 1-*
;evaluated at definition time (in loop below)
%xdefine %1_table (%%table - 2*%2)
%xdefine %%base %1_table
%xdefine %%prefix mangle(private_prefix %+ _%1)
; dynamically generated label
%%table:
%rep %0 - 1 ; repeat for num args
dd %%prefix %+ .w%2 - %%base
%rotate 1
%endrep
%endmacro
BIDIR_JMP_TABLE avg_ssse3, 4, 8, 16, 32, 64, 128
BIDIR_JMP_TABLE w_avg_ssse3, 4, 8, 16, 32, 64, 128
BIDIR_JMP_TABLE mask_ssse3, 4, 8, 16, 32, 64, 128
SECTION .text
INIT_XMM ssse3
%if WIN64
DECLARE_REG_TMP 6, 4
%else
DECLARE_REG_TMP 6, 7
%endif
%macro BIDIR_FN 1 ; op
%1 0
lea stride3q, [strideq*3]
jmp wq
.w4_loop:
%1_INC_PTR 2
%1 0
lea dstq, [dstq+strideq*4]
.w4: ; tile 4x
movd [dstq ], m0 ; copy dw[0]
pshuflw m1, m0, q1032 ; swap dw[1] and dw[0]
movd [dstq+strideq*1], m1 ; copy dw[1]
punpckhqdq m0, m0 ; swap dw[3,2] with dw[1,0]
movd [dstq+strideq*2], m0 ; dw[2]
psrlq m0, 32 ; shift right in dw[3]
movd [dstq+stride3q ], m0 ; copy
sub hd, 4
jg .w4_loop
RET
.w8_loop:
%1_INC_PTR 2
%1 0
lea dstq, [dstq+strideq*2]
.w8:
movq [dstq ], m0
movhps [dstq+strideq*1], m0
sub hd, 2
jg .w8_loop
RET
.w16_loop:
%1_INC_PTR 2
%1 0
lea dstq, [dstq+strideq]
.w16:
mova [dstq ], m0
dec hd
jg .w16_loop
RET
.w32_loop:
%1_INC_PTR 4
%1 0
lea dstq, [dstq+strideq]
.w32:
mova [dstq ], m0
%1 2
mova [dstq + 16 ], m0
dec hd
jg .w32_loop
RET
.w64_loop:
%1_INC_PTR 8
%1 0
add dstq, strideq
.w64:
%assign i 0
%rep 4
mova [dstq + i*16 ], m0
%assign i i+1
%if i < 4
%1 2*i
%endif
%endrep
dec hd
jg .w64_loop
RET
.w128_loop:
%1_INC_PTR 16
%1 0
add dstq, strideq
.w128:
%assign i 0
%rep 8
mova [dstq + i*16 ], m0
%assign i i+1
%if i < 8
%1 2*i
%endif
%endrep
dec hd
jg .w128_loop
RET
%endmacro
%macro AVG 1 ; src_offset
; writes AVG of tmp1 tmp2 uint16 coeffs into uint8 pixel
mova m0, [tmp1q+(%1+0)*mmsize] ; load 8 coef(2bytes) from tmp1
paddw m0, [tmp2q+(%1+0)*mmsize] ; load/add 8 coef(2bytes) tmp2
mova m1, [tmp1q+(%1+1)*mmsize]
paddw m1, [tmp2q+(%1+1)*mmsize]
pmulhrsw m0, m2
pmulhrsw m1, m2
packuswb m0, m1 ; pack/trunc 16 bits from m0 & m1 to 8 bit
%endmacro
%macro AVG_INC_PTR 1
add tmp1q, %1*mmsize
add tmp2q, %1*mmsize
%endmacro
cglobal avg, 4, 7, 3, dst, stride, tmp1, tmp2, w, h, stride3
lea r6, [avg_ssse3_table]
tzcnt wd, wm ; leading zeros
movifnidn hd, hm ; move h(stack) to h(register) if not already that register
movsxd wq, dword [r6+wq*4] ; push table entry matching the tile width (tzcnt) in widen reg
mova m2, [pw_1024+r6-avg_ssse3_table] ; fill m2 with shift/align
add wq, r6
BIDIR_FN AVG
%macro W_AVG 1 ; src_offset
; (a * weight + b * (16 - weight) + 128) >> 8
; = ((a - b) * weight + (b << 4) + 128) >> 8
; = ((((b - a) * (-weight << 12)) >> 16) + b + 8) >> 4
mova m0, [tmp2q+(%1+0)*mmsize]
psubw m2, m0, [tmp1q+(%1+0)*mmsize]
mova m1, [tmp2q+(%1+1)*mmsize]
psubw m3, m1, [tmp1q+(%1+1)*mmsize]
paddw m2, m2 ; compensate for the weight only being half
paddw m3, m3 ; of what it should be
pmulhw m2, m4 ; (b-a) * (-weight << 12)
pmulhw m3, m4 ; (b-a) * (-weight << 12)
paddw m0, m2 ; ((b-a) * -weight) + b
paddw m1, m3
pmulhrsw m0, m5
pmulhrsw m1, m5
packuswb m0, m1
%endmacro
%define W_AVG_INC_PTR AVG_INC_PTR
cglobal w_avg, 4, 7, 6, dst, stride, tmp1, tmp2, w, h, stride3
lea r6, [w_avg_ssse3_table]
tzcnt wd, wm
movifnidn hd, hm
movd m0, r6m
pshuflw m0, m0, q0000
punpcklqdq m0, m0
movsxd wq, dword [r6+wq*4]
pxor m4, m4
psllw m0, 11 ; can't shift by 12, sign bit must be preserved
psubw m4, m0
mova m5, [pw_2048+r6-w_avg_ssse3_table]
add wq, r6
BIDIR_FN W_AVG
%macro MASK 1 ; src_offset
; (a * m + b * (64 - m) + 512) >> 10
; = ((a - b) * m + (b << 6) + 512) >> 10
; = ((((b - a) * (-m << 10)) >> 16) + b + 8) >> 4
mova m3, [maskq+(%1+0)*(mmsize/2)]
mova m0, [tmp2q+(%1+0)*mmsize] ; b
psubw m1, m0, [tmp1q+(%1+0)*mmsize] ; b - a
mova m6, m3 ; m
psubb m3, m4, m6 ; -m
paddw m1, m1 ; (b - a) << 1
paddb m3, m3 ; -m << 1
punpcklbw m2, m4, m3 ; -m << 9 (<< 8 when ext as uint16)
pmulhw m1, m2 ; (-m * (b - a)) << 10
paddw m0, m1 ; + b
mova m1, [tmp2q+(%1+1)*mmsize] ; b
psubw m2, m1, [tmp1q+(%1+1)*mmsize] ; b - a
paddw m2, m2 ; (b - a) << 1
mova m6, m3 ; (-m << 1)
punpckhbw m3, m4, m6 ; (-m << 9)
pmulhw m2, m3 ; (-m << 9)
paddw m1, m2 ; (-m * (b - a)) << 10
pmulhrsw m0, m5 ; round
pmulhrsw m1, m5 ; round
packuswb m0, m1 ; interleave 16 -> 8
%endmacro
%macro MASK_INC_PTR 1
add maskq, %1*mmsize/2
add tmp1q, %1*mmsize
add tmp2q, %1*mmsize
%endmacro
cglobal mask, 4, 8, 7, dst, stride, tmp1, tmp2, w, h, mask, stride3
lea r7, [mask_ssse3_table]
tzcnt wd, wm
movifnidn hd, hm
mov maskq, maskmp
movsxd wq, dword [r7+wq*4]
pxor m4, m4
mova m5, [pw_2048+r7-mask_ssse3_table]
add wq, r7
BIDIR_FN MASK
%endif ; ARCH_X86_64

View File

@ -133,6 +133,7 @@ static struct {
unsigned cpu_flag;
const char *cpu_flag_name;
const char *test_name;
unsigned int seed;
} state;
/* float compare support code */
@ -413,6 +414,7 @@ static void check_cpu_flag(const char *const name, unsigned flag) {
for (int i = 0; tests[i].func; i++) {
if (state.test_name && strcmp(tests[i].name, state.test_name))
continue;
srand(state.seed);
state.current_test_name = tests[i].name;
tests[i].func();
}
@ -429,7 +431,7 @@ static void print_cpu_name(void) {
int main(int argc, char *argv[]) {
(void)func_new, (void)func_ref;
unsigned int seed = get_seed();
state.seed = get_seed();
int ret = 0;
while (argc > 1) {
@ -446,16 +448,21 @@ int main(int argc, char *argv[]) {
state.bench_pattern = "";
} else if (!strncmp(argv[1], "--test=", 7)) {
state.test_name = argv[1] + 7;
} else if (!strcmp(argv[1], "--list")) {
fprintf(stderr, "checkasm: available tests [");
for (int i = 0; tests[i].func; i++)
fprintf(stderr, "%s%s", i ? ", ": "", tests[i].name);
fprintf(stderr, "]\n");
return 0;
} else {
seed = strtoul(argv[1], NULL, 10);
state.seed = strtoul(argv[1], NULL, 10);
}
argc--;
argv++;
}
fprintf(stderr, "checkasm: using random seed %u\n", seed);
srand(seed);
fprintf(stderr, "checkasm: using random seed %u\n", state.seed);
check_cpu_flag(NULL, 0);
for (int i = 0; cpus[i].flag; i++)
@ -521,6 +528,7 @@ void *checkasm_check_func(void *const func, const char *const name, ...) {
v->ok = 1;
v->cpu = state.cpu_flag;
state.current_func_ver = v;
srand(state.seed);
if (state.cpu_flag)
state.num_checked++;

View File

@ -46,6 +46,8 @@ static const char *const intra_pred_mode_names[N_IMPL_INTRA_PRED_MODES] = {
[FILTER_PRED] = "filter"
};
static const char *const cfl_ac_names[3] = { "420", "422", "444" };
static const char *const cfl_pred_mode_names[DC_128_PRED + 1] = {
[DC_PRED] = "cfl",
[DC_128_PRED] = "cfl_128",
@ -101,6 +103,42 @@ static void check_intra_pred(Dav1dIntraPredDSPContext *const c) {
report("intra_pred");
}
static void check_cfl_ac(Dav1dIntraPredDSPContext *const c) {
ALIGN_STK_32(int16_t, c_dst, 32 * 32,);
ALIGN_STK_32(int16_t, a_dst, 32 * 32,);
ALIGN_STK_32(pixel, luma, 32 * 32,);
declare_func(void, int16_t *ac, const pixel *y, ptrdiff_t stride,
int w_pad, int h_pad, int cw, int ch);
for (int layout = 1; layout <= DAV1D_PIXEL_LAYOUT_I444; layout++) {
const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
for (int w = 4; w <= (32 >> ss_hor); w <<= 1)
if (check_func(c->cfl_ac[layout - 1], "cfl_ac_%s_w%d_%dbpc",
cfl_ac_names[layout - 1], w, BITDEPTH))
{
for (int h = imax(w / 4, 4); h <= imin(w * 4, (32 >> ss_ver)); h <<= 1) {
const ptrdiff_t stride = 32 * sizeof(pixel);
const int w_pad = rand() & ((w >> 2) - 1);
const int h_pad = rand() & ((h >> 2) - 1);
for (int y = 0; y < (h << ss_ver); y++)
for (int x = 0; x < (w << ss_hor); x++)
luma[y * 32 + x] = rand() & ((1 << BITDEPTH) - 1);
call_ref(c_dst, luma, stride, w_pad, h_pad, w, h);
call_new(a_dst, luma, stride, w_pad, h_pad, w, h);
if (memcmp(c_dst, a_dst, w * h * sizeof(*c_dst)))
fail();
bench_new(a_dst, luma, stride, 0, 0, w, h);
}
}
}
report("cfl_ac");
}
static void check_cfl_pred(Dav1dIntraPredDSPContext *const c) {
ALIGN_STK_32(pixel, c_dst, 32 * 32,);
ALIGN_STK_32(pixel, a_dst, 32 * 32,);
@ -179,6 +217,7 @@ void bitfn(checkasm_check_ipred)(void) {
bitfn(dav1d_intra_pred_dsp_init)(&c);
check_intra_pred(&c);
check_cfl_ac(&c);
check_cfl_pred(&c);
check_pal_pred(&c);
}

View File

@ -141,7 +141,7 @@ static void check_lpf_sb(loopfilter_sb_fn fn, const char *const name,
for (int j = 0; j < n_blks; j++) {
const int idx = rand() % (i + 2);
if (idx) vmask[idx - 1] |= 1 << j;
if (idx) vmask[idx - 1] |= 1U << j;
if (dir) {
l[j][lf_idx] = rand() & 63;
l[j + 32][lf_idx] = rand() & 63;

View File

@ -25,12 +25,15 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include <errno.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <dav1d/dav1d.h>
#include "src/cpu.h"
#include "dav1d_fuzzer.h"
static unsigned r32le(const uint8_t *const p) {
@ -58,10 +61,18 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
Dav1dContext * ctx = NULL;
Dav1dPicture pic;
const uint8_t *ptr = data;
int have_seq_hdr = 0;
int err;
dav1d_version();
// memory sanitizer is inherently incompatible with asm
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
dav1d_set_cpu_flags_mask(0);
#endif
#endif
if (size < 32) goto end;
ptr += 32; // skip ivf header
@ -92,6 +103,17 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
if (!frame_size) continue;
if (!have_seq_hdr) {
Dav1dSequenceHeader seq = { 0 };
int err = dav1d_parse_sequence_header(&seq, ptr, frame_size);
// skip frames until we see a sequence header
if (err != 0) {
ptr += frame_size;
continue;
}
have_seq_hdr = 1;
}
// copy frame data to a new buffer to catch reads past the end of input
p = dav1d_data_create(&buf, frame_size);
if (!p) goto cleanup;

View File

@ -67,10 +67,15 @@ if is_asm_enabled
m_lib = cc.find_library('m', required: false)
libdav1d_nasm_objs_if_needed = []
if meson.version().version_compare('< 0.48.999')
libdav1d_nasm_objs_if_needed = libdav1d_nasm_objs
endif
checkasm = executable('checkasm',
checkasm_sources,
checkasm_nasm_objs,
libdav1d_nasm_objs,
libdav1d_nasm_objs_if_needed,
objects: [
checkasm_bitdepth_objs,

View File

@ -99,6 +99,23 @@ int main(const int argc, char *const *const argv) {
if (!cli_settings.quiet)
fprintf(stderr, "dav1d %s - by VideoLAN\n", DAV1D_VERSION);
// skip frames until a sequence header is found
if (cli_settings.skip) {
Dav1dSequenceHeader seq;
unsigned seq_skip = 0;
while (dav1d_parse_sequence_header(&seq, data.data, data.sz)) {
if ((res = input_read(in, &data)) < 0) {
input_close(in);
return res;
}
seq_skip++;
}
if (seq_skip && !cli_settings.quiet)
fprintf(stderr,
"skipped %u packets due to missing sequence header\n",
seq_skip);
}
//getc(stdin);
if (cli_settings.limit != 0 && cli_settings.limit < total)
total = cli_settings.limit;

View File

@ -48,6 +48,9 @@ enum {
ARG_FRAME_THREADS,
ARG_TILE_THREADS,
ARG_VERIFY,
ARG_FILM_GRAIN,
ARG_OPPOINT,
ARG_ALL_LAYERS,
};
static const struct option long_opts[] = {
@ -62,6 +65,9 @@ static const struct option long_opts[] = {
{ "framethreads", 1, NULL, ARG_FRAME_THREADS },
{ "tilethreads", 1, NULL, ARG_TILE_THREADS },
{ "verify", 1, NULL, ARG_VERIFY },
{ "filmgrain", 1, NULL, ARG_FILM_GRAIN },
{ "oppoint", 1, NULL, ARG_OPPOINT },
{ "alllayers", 1, NULL, ARG_ALL_LAYERS },
{ NULL, 0, NULL, 0 },
};
@ -86,6 +92,9 @@ static void usage(const char *const app, const char *const reason, ...) {
" --version/-v: print version and exit\n"
" --framethreads $num: number of frame threads (default: 1)\n"
" --tilethreads $num: number of tile threads (default: 1)\n"
" --filmgrain enable film grain application (default: 1, except if muxer is md5)\n"
" --oppoint $num: select an operating point of a scalable AV1 bitstream (0 - 32)\n"
" --alllayers $num: output all spatial layers of a scalable AV1 bitstream (default: 1)\n"
" --verify $md5: verify decoded md5. implies --muxer md5, no output\n");
exit(1);
}
@ -124,8 +133,9 @@ void parse(const int argc, char *const *const argv,
memset(cli_settings, 0, sizeof(*cli_settings));
dav1d_default_settings(lib_settings);
int grain_specified = 0;
while ((o = getopt_long(argc, argv, short_opts, long_opts, NULL)) >= 0) {
while ((o = getopt_long(argc, argv, short_opts, long_opts, NULL)) != -1) {
switch (o) {
case 'o':
cli_settings->outputfile = optarg;
@ -159,14 +169,29 @@ void parse(const int argc, char *const *const argv,
case ARG_VERIFY:
cli_settings->verify = optarg;
break;
case ARG_FILM_GRAIN:
lib_settings->apply_grain =
!!parse_unsigned(optarg, ARG_FILM_GRAIN, argv[0]);
grain_specified = 1;
break;
case ARG_OPPOINT:
lib_settings->operating_point =
parse_unsigned(optarg, ARG_OPPOINT, argv[0]);
break;
case ARG_ALL_LAYERS:
lib_settings->all_layers =
!!parse_unsigned(optarg, ARG_ALL_LAYERS, argv[0]);
break;
case 'v':
fprintf(stderr, "%s\n", dav1d_version());
exit(0);
default:
break;
usage(argv[0], NULL);
}
}
if (optind < argc)
usage(argv[0], "Extra/unused arguments found, e.g. '%s'\n", argv[optind]);
if (cli_settings->verify) {
if (cli_settings->outputfile)
usage(argv[0], "Verification (--verify) requires output file (-o/--output) to not set");
@ -178,6 +203,12 @@ void parse(const int argc, char *const *const argv,
cli_settings->muxer = "md5";
}
if (!grain_specified && cli_settings->muxer &&
!strcmp(cli_settings->muxer, "md5"))
{
lib_settings->apply_grain = 0;
}
if (!cli_settings->inputfile)
usage(argv[0], "Input file (-i/--input) is required");
if ((!cli_settings->muxer || strcmp(cli_settings->muxer, "null")) &&

View File

@ -36,6 +36,10 @@
#include "input/demuxer.h"
#ifdef _MSC_VER
#define ftello _ftelli64
#endif
typedef struct DemuxerPriv {
FILE *f;
} IvfInputContext;
@ -44,6 +48,10 @@ static unsigned rl32(const uint8_t *const p) {
return ((uint32_t)p[3] << 24U) | (p[2] << 16U) | (p[1] << 8U) | p[0];
}
static int64_t rl64(const uint8_t *const p) {
return (((uint64_t) rl32(&p[4])) << 32) | rl32(p);
}
static int ivf_open(IvfInputContext *const c, const char *const file,
unsigned fps[2], unsigned *const num_frames)
{
@ -87,16 +95,20 @@ static int ivf_open(IvfInputContext *const c, const char *const file,
}
static int ivf_read(IvfInputContext *const c, Dav1dData *const buf) {
uint8_t data[4];
uint8_t data[8];
uint8_t *ptr;
int res;
const int64_t off = ftello(c->f);
if ((res = fread(data, 4, 1, c->f)) != 1)
return -1; // EOF
fseek(c->f, 8, SEEK_CUR); // skip timestamp
const ptrdiff_t sz = rl32(data);
if ((res = fread(data, 8, 1, c->f)) != 1)
return -1; // EOF
ptr = dav1d_data_create(buf, sz);
if (!ptr) return -1;
buf->m.offset = off;
buf->m.timestamp = rl64(data);
if ((res = fread(ptr, sz, 1, c->f)) != 1) {
fprintf(stderr, "Failed to read frame data: %s\n", strerror(errno));
dav1d_data_unref(buf);

View File

@ -148,7 +148,8 @@ static void md5_update(MD5Context *const md5, const uint8_t *data, unsigned len)
}
while (len >= 64) {
md5_body(md5, data);
memcpy(md5->data, data, 64);
md5_body(md5, md5->data);
md5->len += 64;
data += 64;
len -= 64;

View File

@ -37,6 +37,8 @@
typedef struct MuxerPriv {
FILE *f;
int first;
unsigned fps[2];
} Y4m2OutputContext;
static int y4m2_open(Y4m2OutputContext *const c, const char *const file,
@ -49,6 +51,14 @@ static int y4m2_open(Y4m2OutputContext *const c, const char *const file,
return -1;
}
c->first = 1;
c->fps[0] = fps[0];
c->fps[1] = fps[1];
return 0;
}
static int write_header(Y4m2OutputContext *const c, const Dav1dPicture *const p) {
static const char *const ss_names[][2] = {
[DAV1D_PIXEL_LAYOUT_I400] = { "mono", "mono10" },
[DAV1D_PIXEL_LAYOUT_I420] = { NULL, "420p10" },
@ -59,20 +69,26 @@ static int y4m2_open(Y4m2OutputContext *const c, const char *const file,
static const char *const chr_names_8bpc_i420[] = {
[DAV1D_CHR_UNKNOWN] = "420jpeg",
[DAV1D_CHR_VERTICAL] = "420mpeg2",
[DAV1D_CHR_COLOCATED] = "420paldv"
[DAV1D_CHR_COLOCATED] = "420"
};
const char *const ss_name = p->layout == DAV1D_PIXEL_LAYOUT_I420 && p->bpc == 8 ?
chr_names_8bpc_i420[p->chr > 2 ? DAV1D_CHR_UNKNOWN : p->chr] :
ss_names[p->layout][p->bpc > 8];
const char *const ss_name =
p->p.layout == DAV1D_PIXEL_LAYOUT_I420 && p->p.bpc == 8 ?
chr_names_8bpc_i420[p->seq_hdr->chr > 2 ? DAV1D_CHR_UNKNOWN : p->seq_hdr->chr] :
ss_names[p->p.layout][p->p.bpc > 8];
fprintf(c->f, "YUV4MPEG2 W%d H%d C%s Ip F%d:%d\n",
p->w, p->h, ss_name, fps[0], fps[1]);
fprintf(c->f, "YUV4MPEG2 W%d H%d F%d:%d Ip C%s\n",
p->p.w, p->p.h, c->fps[0], c->fps[1], ss_name);
return 0;
}
static int y4m2_write(Y4m2OutputContext *const c, Dav1dPicture *const p) {
if (c->first) {
c->first = 0;
const int res = write_header(c, p);
if (res < 0) return res;
}
fprintf(c->f, "FRAME\n");
uint8_t *ptr;