mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-07 09:54:42 +00:00
Bug 1657200 - Update libdav1d to d0e50cac for Firefox 81. r=mjf
Differential Revision: https://phabricator.services.mozilla.com/D88182
This commit is contained in:
parent
14bfc1e8f4
commit
342f755dce
@ -66,6 +66,8 @@ if stack_alignment == 0:
|
||||
DEFINES['STACK_ALIGNMENT'] = stack_alignment
|
||||
|
||||
if CONFIG['CPU_ARCH'] in ('x86', 'x86_64'):
|
||||
ASFLAGS += ['-Dprivate_prefix=dav1d']
|
||||
|
||||
SOURCES += [
|
||||
'../../../third_party/dav1d/src/x86/cpu.c',
|
||||
'../../../third_party/dav1d/src/x86/msac_init.c',
|
||||
|
@ -20,11 +20,11 @@ origin:
|
||||
|
||||
# Human-readable identifier for this version/release
|
||||
# Generally "version NNN", "tag SSS", "bookmark SSS"
|
||||
release: commit 6cf58c8e7deb54e287afeee6710b2a3774eded9c (2020-07-20T15:40:01.000+02:00).
|
||||
release: commit d0e50cacead63e9904dde184580ce9a746374bd5 (2020-08-21T15:13:49.000+02:00).
|
||||
|
||||
# Revision to pull in
|
||||
# Must be a long or short commit SHA (long preferred)
|
||||
revision: 6cf58c8e7deb54e287afeee6710b2a3774eded9c
|
||||
revision: d0e50cacead63e9904dde184580ce9a746374bd5
|
||||
|
||||
# The package's license, where possible using the mnemonic from
|
||||
# https://spdx.org/licenses/
|
||||
|
@ -1,2 +1,2 @@
|
||||
/* auto-generated, do not edit */
|
||||
#define DAV1D_VERSION "0.7.1-36-g6cf58c8"
|
||||
#define DAV1D_VERSION "0.7.1-49-gd0e50ca"
|
||||
|
1
third_party/dav1d/meson.build
vendored
1
third_party/dav1d/meson.build
vendored
@ -351,6 +351,7 @@ cdata.set10('ARCH_X86_64', host_machine.cpu_family() == 'x86_64')
|
||||
cdata.set10('ARCH_X86_32', host_machine.cpu_family() == 'x86')
|
||||
|
||||
if host_machine.cpu_family().startswith('x86')
|
||||
cdata_asm.set('private_prefix', 'dav1d')
|
||||
cdata_asm.set10('ARCH_X86_64', host_machine.cpu_family() == 'x86_64')
|
||||
cdata_asm.set10('ARCH_X86_32', host_machine.cpu_family() == 'x86')
|
||||
cdata_asm.set10('PIC', true)
|
||||
|
89
third_party/dav1d/src/ext/x86/x86inc.asm
vendored
89
third_party/dav1d/src/ext/x86/x86inc.asm
vendored
@ -1,5 +1,5 @@
|
||||
;*****************************************************************************
|
||||
;* x86inc.asm: x264asm abstraction layer
|
||||
;* x86inc.asm: x86 abstraction layer
|
||||
;*****************************************************************************
|
||||
;* Copyright (C) 2005-2020 x264 project
|
||||
;*
|
||||
@ -21,23 +21,14 @@
|
||||
;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
;*****************************************************************************
|
||||
|
||||
; This is a header file for the x264ASM assembly language, which uses
|
||||
; This is a header file for the x86inc.asm assembly language, which uses
|
||||
; NASM/YASM syntax combined with a large number of macros to provide easy
|
||||
; abstraction between different calling conventions (x86_32, win64, linux64).
|
||||
; It also has various other useful features to simplify writing the kind of
|
||||
; DSP functions that are most often used in x264.
|
||||
|
||||
; Unlike the rest of x264, this file is available under an ISC license, as it
|
||||
; has significant usefulness outside of x264 and we want it to be available
|
||||
; to the largest audience possible. Of course, if you modify it for your own
|
||||
; purposes to add a new feature, we strongly encourage contributing a patch
|
||||
; as this feature might be useful for others as well. Send patches or ideas
|
||||
; to x264-devel@videolan.org .
|
||||
|
||||
%include "config.asm"
|
||||
; DSP functions that are most often used.
|
||||
|
||||
%ifndef private_prefix
|
||||
%define private_prefix dav1d
|
||||
%error private_prefix not defined
|
||||
%endif
|
||||
|
||||
%ifndef public_prefix
|
||||
@ -118,7 +109,7 @@
|
||||
; Macros to eliminate most code duplication between x86_32 and x86_64:
|
||||
; Currently this works only for leaf functions which load all their arguments
|
||||
; into registers at the start, and make no other use of the stack. Luckily that
|
||||
; covers most of x264's asm.
|
||||
; covers most use cases.
|
||||
|
||||
; PROLOGUE:
|
||||
; %1 = number of arguments. loads them from stack if needed.
|
||||
@ -1522,18 +1513,18 @@ AVX_INSTR orps, sse, 1, 0, 1
|
||||
AVX_INSTR pabsb, ssse3
|
||||
AVX_INSTR pabsd, ssse3
|
||||
AVX_INSTR pabsw, ssse3
|
||||
AVX_INSTR packsswb, mmx, 0, 0, 0
|
||||
AVX_INSTR packssdw, mmx, 0, 0, 0
|
||||
AVX_INSTR packuswb, mmx, 0, 0, 0
|
||||
AVX_INSTR packsswb, mmx, 0, 0, 0
|
||||
AVX_INSTR packusdw, sse4, 0, 0, 0
|
||||
AVX_INSTR packuswb, mmx, 0, 0, 0
|
||||
AVX_INSTR paddb, mmx, 0, 0, 1
|
||||
AVX_INSTR paddw, mmx, 0, 0, 1
|
||||
AVX_INSTR paddd, mmx, 0, 0, 1
|
||||
AVX_INSTR paddq, sse2, 0, 0, 1
|
||||
AVX_INSTR paddsb, mmx, 0, 0, 1
|
||||
AVX_INSTR paddsw, mmx, 0, 0, 1
|
||||
AVX_INSTR paddusb, mmx, 0, 0, 1
|
||||
AVX_INSTR paddusw, mmx, 0, 0, 1
|
||||
AVX_INSTR paddw, mmx, 0, 0, 1
|
||||
AVX_INSTR palignr, ssse3, 0, 1, 0
|
||||
AVX_INSTR pand, mmx, 0, 0, 1
|
||||
AVX_INSTR pandn, mmx, 0, 0, 0
|
||||
@ -1541,71 +1532,71 @@ AVX_INSTR pavgb, mmx2, 0, 0, 1
|
||||
AVX_INSTR pavgw, mmx2, 0, 0, 1
|
||||
AVX_INSTR pblendvb, sse4 ; can't be emulated
|
||||
AVX_INSTR pblendw, sse4, 0, 1, 0
|
||||
AVX_INSTR pclmulqdq, fnord, 0, 1, 0
|
||||
AVX_INSTR pclmulhqhqdq, fnord, 0, 0, 0
|
||||
AVX_INSTR pclmulhqlqdq, fnord, 0, 0, 0
|
||||
AVX_INSTR pclmullqhqdq, fnord, 0, 0, 0
|
||||
AVX_INSTR pclmullqlqdq, fnord, 0, 0, 0
|
||||
AVX_INSTR pcmpestri, sse42
|
||||
AVX_INSTR pcmpestrm, sse42
|
||||
AVX_INSTR pcmpistri, sse42
|
||||
AVX_INSTR pcmpistrm, sse42
|
||||
AVX_INSTR pclmulqdq, fnord, 0, 1, 0
|
||||
AVX_INSTR pcmpeqb, mmx, 0, 0, 1
|
||||
AVX_INSTR pcmpeqw, mmx, 0, 0, 1
|
||||
AVX_INSTR pcmpeqd, mmx, 0, 0, 1
|
||||
AVX_INSTR pcmpeqq, sse4, 0, 0, 1
|
||||
AVX_INSTR pcmpeqw, mmx, 0, 0, 1
|
||||
AVX_INSTR pcmpestri, sse42
|
||||
AVX_INSTR pcmpestrm, sse42
|
||||
AVX_INSTR pcmpgtb, mmx, 0, 0, 0
|
||||
AVX_INSTR pcmpgtw, mmx, 0, 0, 0
|
||||
AVX_INSTR pcmpgtd, mmx, 0, 0, 0
|
||||
AVX_INSTR pcmpgtq, sse42, 0, 0, 0
|
||||
AVX_INSTR pcmpgtw, mmx, 0, 0, 0
|
||||
AVX_INSTR pcmpistri, sse42
|
||||
AVX_INSTR pcmpistrm, sse42
|
||||
AVX_INSTR pextrb, sse4
|
||||
AVX_INSTR pextrd, sse4
|
||||
AVX_INSTR pextrq, sse4
|
||||
AVX_INSTR pextrw, mmx2
|
||||
AVX_INSTR phaddw, ssse3, 0, 0, 0
|
||||
AVX_INSTR phaddd, ssse3, 0, 0, 0
|
||||
AVX_INSTR phaddsw, ssse3, 0, 0, 0
|
||||
AVX_INSTR phaddw, ssse3, 0, 0, 0
|
||||
AVX_INSTR phminposuw, sse4
|
||||
AVX_INSTR phsubw, ssse3, 0, 0, 0
|
||||
AVX_INSTR phsubd, ssse3, 0, 0, 0
|
||||
AVX_INSTR phsubsw, ssse3, 0, 0, 0
|
||||
AVX_INSTR phsubw, ssse3, 0, 0, 0
|
||||
AVX_INSTR pinsrb, sse4, 0, 1, 0
|
||||
AVX_INSTR pinsrd, sse4, 0, 1, 0
|
||||
AVX_INSTR pinsrq, sse4, 0, 1, 0
|
||||
AVX_INSTR pinsrw, mmx2, 0, 1, 0
|
||||
AVX_INSTR pmaddwd, mmx, 0, 0, 1
|
||||
AVX_INSTR pmaddubsw, ssse3, 0, 0, 0
|
||||
AVX_INSTR pmaddwd, mmx, 0, 0, 1
|
||||
AVX_INSTR pmaxsb, sse4, 0, 0, 1
|
||||
AVX_INSTR pmaxsw, mmx2, 0, 0, 1
|
||||
AVX_INSTR pmaxsd, sse4, 0, 0, 1
|
||||
AVX_INSTR pmaxsw, mmx2, 0, 0, 1
|
||||
AVX_INSTR pmaxub, mmx2, 0, 0, 1
|
||||
AVX_INSTR pmaxuw, sse4, 0, 0, 1
|
||||
AVX_INSTR pmaxud, sse4, 0, 0, 1
|
||||
AVX_INSTR pmaxuw, sse4, 0, 0, 1
|
||||
AVX_INSTR pminsb, sse4, 0, 0, 1
|
||||
AVX_INSTR pminsw, mmx2, 0, 0, 1
|
||||
AVX_INSTR pminsd, sse4, 0, 0, 1
|
||||
AVX_INSTR pminsw, mmx2, 0, 0, 1
|
||||
AVX_INSTR pminub, mmx2, 0, 0, 1
|
||||
AVX_INSTR pminuw, sse4, 0, 0, 1
|
||||
AVX_INSTR pminud, sse4, 0, 0, 1
|
||||
AVX_INSTR pminuw, sse4, 0, 0, 1
|
||||
AVX_INSTR pmovmskb, mmx2
|
||||
AVX_INSTR pmovsxbw, sse4
|
||||
AVX_INSTR pmovsxbd, sse4
|
||||
AVX_INSTR pmovsxbq, sse4
|
||||
AVX_INSTR pmovsxbw, sse4
|
||||
AVX_INSTR pmovsxdq, sse4
|
||||
AVX_INSTR pmovsxwd, sse4
|
||||
AVX_INSTR pmovsxwq, sse4
|
||||
AVX_INSTR pmovsxdq, sse4
|
||||
AVX_INSTR pmovzxbw, sse4
|
||||
AVX_INSTR pmovzxbd, sse4
|
||||
AVX_INSTR pmovzxbq, sse4
|
||||
AVX_INSTR pmovzxbw, sse4
|
||||
AVX_INSTR pmovzxdq, sse4
|
||||
AVX_INSTR pmovzxwd, sse4
|
||||
AVX_INSTR pmovzxwq, sse4
|
||||
AVX_INSTR pmovzxdq, sse4
|
||||
AVX_INSTR pmuldq, sse4, 0, 0, 1
|
||||
AVX_INSTR pmulhrsw, ssse3, 0, 0, 1
|
||||
AVX_INSTR pmulhuw, mmx2, 0, 0, 1
|
||||
AVX_INSTR pmulhw, mmx, 0, 0, 1
|
||||
AVX_INSTR pmullw, mmx, 0, 0, 1
|
||||
AVX_INSTR pmulld, sse4, 0, 0, 1
|
||||
AVX_INSTR pmullw, mmx, 0, 0, 1
|
||||
AVX_INSTR pmuludq, sse2, 0, 0, 1
|
||||
AVX_INSTR por, mmx, 0, 0, 1
|
||||
AVX_INSTR psadbw, mmx2, 0, 0, 1
|
||||
@ -1614,35 +1605,35 @@ AVX_INSTR pshufd, sse2
|
||||
AVX_INSTR pshufhw, sse2
|
||||
AVX_INSTR pshuflw, sse2
|
||||
AVX_INSTR psignb, ssse3, 0, 0, 0
|
||||
AVX_INSTR psignw, ssse3, 0, 0, 0
|
||||
AVX_INSTR psignd, ssse3, 0, 0, 0
|
||||
AVX_INSTR psllw, mmx, 0, 0, 0
|
||||
AVX_INSTR psignw, ssse3, 0, 0, 0
|
||||
AVX_INSTR pslld, mmx, 0, 0, 0
|
||||
AVX_INSTR psllq, mmx, 0, 0, 0
|
||||
AVX_INSTR pslldq, sse2, 0, 0, 0
|
||||
AVX_INSTR psraw, mmx, 0, 0, 0
|
||||
AVX_INSTR psllq, mmx, 0, 0, 0
|
||||
AVX_INSTR psllw, mmx, 0, 0, 0
|
||||
AVX_INSTR psrad, mmx, 0, 0, 0
|
||||
AVX_INSTR psrlw, mmx, 0, 0, 0
|
||||
AVX_INSTR psraw, mmx, 0, 0, 0
|
||||
AVX_INSTR psrld, mmx, 0, 0, 0
|
||||
AVX_INSTR psrlq, mmx, 0, 0, 0
|
||||
AVX_INSTR psrldq, sse2, 0, 0, 0
|
||||
AVX_INSTR psrlq, mmx, 0, 0, 0
|
||||
AVX_INSTR psrlw, mmx, 0, 0, 0
|
||||
AVX_INSTR psubb, mmx, 0, 0, 0
|
||||
AVX_INSTR psubw, mmx, 0, 0, 0
|
||||
AVX_INSTR psubd, mmx, 0, 0, 0
|
||||
AVX_INSTR psubq, sse2, 0, 0, 0
|
||||
AVX_INSTR psubsb, mmx, 0, 0, 0
|
||||
AVX_INSTR psubsw, mmx, 0, 0, 0
|
||||
AVX_INSTR psubusb, mmx, 0, 0, 0
|
||||
AVX_INSTR psubusw, mmx, 0, 0, 0
|
||||
AVX_INSTR psubw, mmx, 0, 0, 0
|
||||
AVX_INSTR ptest, sse4
|
||||
AVX_INSTR punpckhbw, mmx, 0, 0, 0
|
||||
AVX_INSTR punpckhwd, mmx, 0, 0, 0
|
||||
AVX_INSTR punpckhdq, mmx, 0, 0, 0
|
||||
AVX_INSTR punpckhqdq, sse2, 0, 0, 0
|
||||
AVX_INSTR punpckhwd, mmx, 0, 0, 0
|
||||
AVX_INSTR punpcklbw, mmx, 0, 0, 0
|
||||
AVX_INSTR punpcklwd, mmx, 0, 0, 0
|
||||
AVX_INSTR punpckldq, mmx, 0, 0, 0
|
||||
AVX_INSTR punpcklqdq, sse2, 0, 0, 0
|
||||
AVX_INSTR punpcklwd, mmx, 0, 0, 0
|
||||
AVX_INSTR pxor, mmx, 0, 0, 1
|
||||
AVX_INSTR rcpps, sse, 1
|
||||
AVX_INSTR rcpss, sse, 1, 0, 0
|
||||
@ -1674,8 +1665,8 @@ AVX_INSTR xorps, sse, 1, 0, 1
|
||||
|
||||
; 3DNow instructions, for sharing code between AVX, SSE and 3DN
|
||||
AVX_INSTR pfadd, 3dnow, 1, 0, 1
|
||||
AVX_INSTR pfsub, 3dnow, 1, 0, 0
|
||||
AVX_INSTR pfmul, 3dnow, 1, 0, 1
|
||||
AVX_INSTR pfsub, 3dnow, 1, 0, 0
|
||||
|
||||
;%1 == instruction
|
||||
;%2 == minimal instruction set
|
||||
@ -1740,9 +1731,9 @@ GPR_INSTR shrx, bmi2
|
||||
%endmacro
|
||||
%endmacro
|
||||
|
||||
FMA_INSTR pmacsww, pmullw, paddw
|
||||
FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation
|
||||
FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation
|
||||
FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation
|
||||
FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation
|
||||
FMA_INSTR pmacsww, pmullw, paddw
|
||||
FMA_INSTR pmadcswd, pmaddwd, paddd
|
||||
|
||||
; Macros for consolidating FMA3 and FMA4 using 4-operand (dst, src1, src2, src3) syntax.
|
||||
|
23
third_party/dav1d/src/obu.c
vendored
23
third_party/dav1d/src/obu.c
vendored
@ -1198,7 +1198,6 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in, const int globa
|
||||
|
||||
const unsigned init_bit_pos = dav1d_get_bits_pos(&gb);
|
||||
const unsigned init_byte_pos = init_bit_pos >> 3;
|
||||
const unsigned pkt_bytelen = init_byte_pos + len;
|
||||
|
||||
// We must have read a whole number of bytes at this point (1 byte
|
||||
// for the header and whole bytes at a time when reading the
|
||||
@ -1342,6 +1341,7 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in, const int globa
|
||||
// The current bit position is a multiple of 8 (because we
|
||||
// just aligned it) and less than 8*pkt_bytelen because
|
||||
// otherwise the overrun check would have fired.
|
||||
const unsigned pkt_bytelen = init_byte_pos + len;
|
||||
const unsigned bit_pos = dav1d_get_bits_pos(&gb);
|
||||
assert((bit_pos & 7) == 0);
|
||||
assert(pkt_bytelen >= (bit_pos >> 3));
|
||||
@ -1368,17 +1368,12 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in, const int globa
|
||||
const enum ObuMetaType meta_type = dav1d_get_uleb128(&gb);
|
||||
const int meta_type_len = (dav1d_get_bits_pos(&gb) - init_bit_pos) >> 3;
|
||||
if (gb.error) goto error;
|
||||
Dav1dRef *ref;
|
||||
Dav1dContentLightLevel *content_light;
|
||||
Dav1dMasteringDisplay *mastering_display;
|
||||
Dav1dITUTT35 *itut_t35_metadata;
|
||||
|
||||
switch (meta_type) {
|
||||
case OBU_META_HDR_CLL:
|
||||
ref = dav1d_ref_create(sizeof(Dav1dContentLightLevel));
|
||||
case OBU_META_HDR_CLL: {
|
||||
Dav1dRef *ref = dav1d_ref_create(sizeof(Dav1dContentLightLevel));
|
||||
if (!ref) return DAV1D_ERR(ENOMEM);
|
||||
content_light = ref->data;
|
||||
memset(content_light, 0, sizeof(*content_light));
|
||||
Dav1dContentLightLevel *const content_light = ref->data;
|
||||
|
||||
content_light->max_content_light_level = dav1d_get_bits(&gb, 16);
|
||||
content_light->max_frame_average_light_level = dav1d_get_bits(&gb, 16);
|
||||
@ -1395,11 +1390,11 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in, const int globa
|
||||
c->content_light = content_light;
|
||||
c->content_light_ref = ref;
|
||||
break;
|
||||
}
|
||||
case OBU_META_HDR_MDCV: {
|
||||
ref = dav1d_ref_create(sizeof(Dav1dMasteringDisplay));
|
||||
Dav1dRef *ref = dav1d_ref_create(sizeof(Dav1dMasteringDisplay));
|
||||
if (!ref) return DAV1D_ERR(ENOMEM);
|
||||
mastering_display = ref->data;
|
||||
memset(mastering_display, 0, sizeof(*mastering_display));
|
||||
Dav1dMasteringDisplay *const mastering_display = ref->data;
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
mastering_display->primaries[i][0] = dav1d_get_bits(&gb, 16);
|
||||
@ -1447,9 +1442,9 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in, const int globa
|
||||
goto error;
|
||||
}
|
||||
|
||||
ref = dav1d_ref_create(sizeof(Dav1dITUTT35) + payload_size * sizeof(uint8_t));
|
||||
Dav1dRef *ref = dav1d_ref_create(sizeof(Dav1dITUTT35) + payload_size * sizeof(uint8_t));
|
||||
if (!ref) return DAV1D_ERR(ENOMEM);
|
||||
itut_t35_metadata = ref->data;
|
||||
Dav1dITUTT35 *const itut_t35_metadata = ref->data;
|
||||
|
||||
// We need our public headers to be C++ compatible, so payload can't be
|
||||
// a flexible array member
|
||||
|
1
third_party/dav1d/src/x86/cdef_avx2.asm
vendored
1
third_party/dav1d/src/x86/cdef_avx2.asm
vendored
@ -23,6 +23,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
%if ARCH_X86_64
|
||||
|
1
third_party/dav1d/src/x86/cdef_avx512.asm
vendored
1
third_party/dav1d/src/x86/cdef_avx512.asm
vendored
@ -23,6 +23,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
%if HAVE_AVX512ICL && ARCH_X86_64
|
||||
|
1
third_party/dav1d/src/x86/cdef_sse.asm
vendored
1
third_party/dav1d/src/x86/cdef_sse.asm
vendored
@ -24,6 +24,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
SECTION_RODATA 16
|
||||
|
1
third_party/dav1d/src/x86/cpuid.asm
vendored
1
third_party/dav1d/src/x86/cpuid.asm
vendored
@ -23,6 +23,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
SECTION .text
|
||||
|
1
third_party/dav1d/src/x86/film_grain.asm
vendored
1
third_party/dav1d/src/x86/film_grain.asm
vendored
@ -23,6 +23,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
%if ARCH_X86_64
|
||||
|
@ -23,6 +23,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
SECTION_RODATA
|
||||
|
1
third_party/dav1d/src/x86/ipred.asm
vendored
1
third_party/dav1d/src/x86/ipred.asm
vendored
@ -23,6 +23,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
%if ARCH_X86_64
|
||||
|
1
third_party/dav1d/src/x86/ipred_ssse3.asm
vendored
1
third_party/dav1d/src/x86/ipred_ssse3.asm
vendored
@ -23,6 +23,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
SECTION_RODATA 16
|
||||
|
1
third_party/dav1d/src/x86/itx.asm
vendored
1
third_party/dav1d/src/x86/itx.asm
vendored
@ -23,6 +23,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
%if ARCH_X86_64
|
||||
|
1
third_party/dav1d/src/x86/itx_ssse3.asm
vendored
1
third_party/dav1d/src/x86/itx_ssse3.asm
vendored
@ -23,6 +23,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
|
||||
|
1
third_party/dav1d/src/x86/loopfilter.asm
vendored
1
third_party/dav1d/src/x86/loopfilter.asm
vendored
@ -23,6 +23,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
%if ARCH_X86_64
|
||||
|
@ -23,6 +23,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
%if ARCH_X86_64
|
||||
|
@ -24,6 +24,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
SECTION_RODATA 16
|
||||
|
509
third_party/dav1d/src/x86/mc_avx2.asm
vendored
509
third_party/dav1d/src/x86/mc_avx2.asm
vendored
@ -23,6 +23,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
%if ARCH_X86_64
|
||||
@ -2766,20 +2767,20 @@ cglobal prep_8tap, 3, 8, 0, tmp, src, stride, w, h, mx, my, stride3
|
||||
%ifidn %1, put
|
||||
%assign isprep 0
|
||||
%if required_stack_alignment <= STACK_ALIGNMENT
|
||||
cglobal put_8tap_scaled, 4, 15, 16, 96, dst, ds, src, ss, w, h, mx, my, dx, dy
|
||||
cglobal put_8tap_scaled, 4, 15, 16, 112, dst, ds, src, ss, w, h, mx, my, dx, dy
|
||||
%else
|
||||
cglobal put_8tap_scaled, 4, 14, 16, 112, dst, ds, src, ss, w, h, mx, my, dx, dy
|
||||
cglobal put_8tap_scaled, 4, 14, 16, 128, dst, ds, src, ss, w, h, mx, my, dx, dy
|
||||
%endif
|
||||
%xdefine base_reg r12
|
||||
%define rndshift 10
|
||||
%else
|
||||
%assign isprep 1
|
||||
%if required_stack_alignment <= STACK_ALIGNMENT
|
||||
cglobal prep_8tap_scaled, 4, 15, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
cglobal prep_8tap_scaled, 4, 15, 16, 128, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
%xdefine tmp_stridem r14q
|
||||
%else
|
||||
cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
%define tmp_stridem qword [rsp+104]
|
||||
cglobal prep_8tap_scaled, 4, 14, 16, 128, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
%define tmp_stridem qword [rsp+120]
|
||||
%endif
|
||||
%xdefine base_reg r11
|
||||
%define rndshift 6
|
||||
@ -2808,7 +2809,7 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
%define hm r6m
|
||||
%endif
|
||||
%if required_stack_alignment > STACK_ALIGNMENT
|
||||
%define dsm [rsp+96]
|
||||
%define dsm [rsp+112]
|
||||
%define rX r1
|
||||
%define rXd r1d
|
||||
%else
|
||||
@ -2824,7 +2825,7 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
%define dxm r7m
|
||||
%else
|
||||
DEFINE_ARGS tmp, src, ss, w, h, _, my, dx, dy, ss3
|
||||
%define hm [rsp+96]
|
||||
%define hm [rsp+112]
|
||||
%endif
|
||||
MCT_8TAP_SCALED_REMAP_REGS_TO_PREV
|
||||
%define rX r14
|
||||
@ -3104,181 +3105,9 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
lea srcq, [srcq+ssq*2]
|
||||
jmp .w4_loop
|
||||
.w8:
|
||||
%ifidn %1, put
|
||||
movifnidn dsm, dsq
|
||||
%endif
|
||||
shr t0d, 16
|
||||
sub srcq, 3
|
||||
movd xm15, t0d
|
||||
pmaddwd m8, [base+rescale_mul]
|
||||
vpbroadcastq m11, [base+pq_0x40000000]
|
||||
vpbroadcastd m15, xm15
|
||||
paddd m14, m8 ; mx+dx*[0-7]
|
||||
pand m6, m14, m10
|
||||
psrld m6, 6
|
||||
paddd m15, m6
|
||||
pcmpeqd m6, m9
|
||||
vextracti128 xm7, m15, 1
|
||||
movd r4d, xm15
|
||||
pextrd r6d, xm15, 2
|
||||
pextrd r7d, xm15, 1
|
||||
pextrd r9d, xm15, 3
|
||||
movd r10d, xm7
|
||||
pextrd r11d, xm7, 2
|
||||
pextrd r13d, xm7, 1
|
||||
pextrd rXd, xm7, 3
|
||||
movq xm15, [base+subpel_filters+r4*8]
|
||||
movq xm10, [base+subpel_filters+r6*8]
|
||||
movhps xm15, [base+subpel_filters+r7*8]
|
||||
movhps xm10, [base+subpel_filters+r9*8]
|
||||
vinserti128 m15, [base+subpel_filters+r10*8], 1
|
||||
vinserti128 m10, [base+subpel_filters+r11*8], 1
|
||||
vpbroadcastq m9, [base+subpel_filters+r13*8]
|
||||
vpbroadcastq m8, [base+subpel_filters+rX*8]
|
||||
psrld m14, 10
|
||||
mova [rsp], xm14
|
||||
vextracti128 xm7, m14, 1
|
||||
movd r4d, xm14
|
||||
pextrd r6d, xm14, 2
|
||||
pextrd r7d, xm14, 1
|
||||
pextrd r9d, xm14, 3
|
||||
movd r10d, xm7
|
||||
pextrd r11d, xm7, 2
|
||||
pextrd r13d, xm7, 1
|
||||
pextrd rXd, xm7, 3
|
||||
pshufd m5, m6, q1100
|
||||
pshufd m6, m6, q3322
|
||||
vpblendd m15, m9, 0xc0
|
||||
vpblendd m10, m8, 0xc0
|
||||
pblendvb m15, m11, m5
|
||||
pblendvb m10, m11, m6
|
||||
vbroadcasti128 m14, [base+subpel_s_shuf8]
|
||||
MC_8TAP_SCALED_H 0, 1, 2, 3, 4, 5, 6, 7 ; 0a 1a 0b 1b
|
||||
MC_8TAP_SCALED_H 1, 2, 3, 4, 5, 6, 7, 8 ; 2a 3a 2b 3b
|
||||
MC_8TAP_SCALED_H 2, 3, 4, 5, 6, 7, 8, 9 ; 4a 5a 4b 5b
|
||||
MC_8TAP_SCALED_H 3, 4, 5, 6, 7, 8, 9, 11 ; 6a 7a 6b 7b
|
||||
mov myd, mym
|
||||
mov dyd, dym
|
||||
pshufb m0, m14 ; 01a 01b
|
||||
pshufb m1, m14 ; 23a 23b
|
||||
pshufb m2, m14 ; 45a 45b
|
||||
pshufb m3, m14 ; 67a 67b
|
||||
vbroadcasti128 m14, [base+wswap]
|
||||
.w8_loop:
|
||||
and myd, 0x3ff
|
||||
mov r6d, 64 << 24
|
||||
mov r4d, myd
|
||||
shr r4d, 6
|
||||
lea r4d, [t1+r4]
|
||||
cmovnz r6q, [base+subpel_filters+r4*8]
|
||||
movq xm11, r6q
|
||||
punpcklbw xm11, xm11
|
||||
psraw xm11, 8
|
||||
vinserti128 m11, xm11, 1
|
||||
pshufd m8, m11, q0000
|
||||
pshufd m9, m11, q1111
|
||||
pmaddwd m4, m0, m8
|
||||
pmaddwd m5, m1, m9
|
||||
pshufd m8, m11, q2222
|
||||
pshufd m11, m11, q3333
|
||||
pmaddwd m6, m2, m8
|
||||
pmaddwd m7, m3, m11
|
||||
paddd m4, m5
|
||||
paddd m6, m7
|
||||
paddd m4, m13
|
||||
paddd m4, m6
|
||||
psrad m4, rndshift
|
||||
vextracti128 xm5, m4, 1
|
||||
packssdw xm4, xm5
|
||||
%ifidn %1, put
|
||||
packuswb xm4, xm4
|
||||
movq [dstq], xm4
|
||||
add dstq, dsm
|
||||
%else
|
||||
mova [tmpq], xm4
|
||||
add tmpq, 16
|
||||
%endif
|
||||
dec hd
|
||||
jz .ret
|
||||
add myd, dyd
|
||||
test myd, ~0x3ff
|
||||
jz .w8_loop
|
||||
test myd, 0x400
|
||||
mov [rsp+16], myd
|
||||
mov r4d, [rsp+ 0]
|
||||
mov r6d, [rsp+ 8]
|
||||
mov r7d, [rsp+ 4]
|
||||
mov r9d, [rsp+12]
|
||||
jz .w8_skip_line
|
||||
vpbroadcastq m6, [srcq+r13]
|
||||
vpbroadcastq m7, [srcq+ rX]
|
||||
movq xm4, [srcq+ r4]
|
||||
movq xm5, [srcq+ r6]
|
||||
movhps xm4, [srcq+ r7]
|
||||
movhps xm5, [srcq+ r9]
|
||||
vinserti128 m4, [srcq+r10], 1
|
||||
vinserti128 m5, [srcq+r11], 1
|
||||
add srcq, ssq
|
||||
mov myd, [rsp+16]
|
||||
mov dyd, dym
|
||||
pshufb m0, m14
|
||||
pshufb m1, m14
|
||||
pshufb m2, m14
|
||||
pshufb m3, m14
|
||||
vpblendd m4, m6, 0xc0
|
||||
vpblendd m5, m7, 0xc0
|
||||
pmaddubsw m4, m15
|
||||
pmaddubsw m5, m10
|
||||
phaddw m4, m5
|
||||
pslld m5, m4, 16
|
||||
paddw m4, m5
|
||||
pmulhrsw m4, m12
|
||||
pblendw m0, m1, 0xaa
|
||||
pblendw m1, m2, 0xaa
|
||||
pblendw m2, m3, 0xaa
|
||||
pblendw m3, m4, 0xaa
|
||||
jmp .w8_loop
|
||||
.w8_skip_line:
|
||||
mova m0, m1
|
||||
mova m1, m2
|
||||
mova m2, m3
|
||||
vpbroadcastq m7, [srcq+r13]
|
||||
vpbroadcastq m8, [srcq+ rX]
|
||||
movq xm3, [srcq+ r4]
|
||||
movq xm4, [srcq+ r6]
|
||||
movhps xm3, [srcq+ r7]
|
||||
movhps xm4, [srcq+ r9]
|
||||
vinserti128 m3, [srcq+r10], 1
|
||||
vinserti128 m4, [srcq+r11], 1
|
||||
add srcq, ssq
|
||||
movq xm5, [srcq+ r4]
|
||||
movq xm6, [srcq+ r6]
|
||||
movhps xm5, [srcq+ r7]
|
||||
movhps xm6, [srcq+ r9]
|
||||
vinserti128 m5, [srcq+r10], 1
|
||||
vinserti128 m6, [srcq+r11], 1
|
||||
vpbroadcastq m9, [srcq+r13]
|
||||
vpbroadcastq m11, [srcq+ rX]
|
||||
add srcq, ssq
|
||||
mov myd, [rsp+16]
|
||||
mov dyd, dym
|
||||
vpblendd m3, m7, 0xc0
|
||||
vpblendd m4, m8, 0xc0
|
||||
vpblendd m5, m9, 0xc0
|
||||
vpblendd m6, m11, 0xc0
|
||||
pmaddubsw m3, m15
|
||||
pmaddubsw m4, m10
|
||||
pmaddubsw m5, m15
|
||||
pmaddubsw m6, m10
|
||||
phaddw m3, m4
|
||||
phaddw m5, m6
|
||||
psrld m4, m3, 16
|
||||
pslld m6, m5, 16
|
||||
paddw m3, m4
|
||||
paddw m5, m6
|
||||
pblendw m3, m5, 0xaa
|
||||
pmulhrsw m3, m12
|
||||
jmp .w8_loop
|
||||
mov dword [rsp+48], 1
|
||||
movifprep tmp_stridem, 16
|
||||
jmp .w_start
|
||||
.w16:
|
||||
mov dword [rsp+48], 2
|
||||
movifprep tmp_stridem, 32
|
||||
@ -3698,127 +3527,9 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
jg .dy1_w4_loop
|
||||
MC_8TAP_SCALED_RET
|
||||
.dy1_w8:
|
||||
%ifidn %1, put
|
||||
movifnidn dsm, dsq
|
||||
%endif
|
||||
shr t0d, 16
|
||||
sub srcq, 3
|
||||
movd xm15, t0d
|
||||
pmaddwd m8, [base+rescale_mul]
|
||||
vpbroadcastq m11, [base+pq_0x40000000]
|
||||
vpbroadcastd m15, xm15
|
||||
paddd m14, m8 ; mx+dx*[0-7]
|
||||
pand m6, m14, m10
|
||||
psrld m6, 6
|
||||
paddd m15, m6
|
||||
pcmpeqd m6, m9
|
||||
vextracti128 xm7, m15, 1
|
||||
movd r4d, xm15
|
||||
pextrd r6d, xm15, 2
|
||||
pextrd r7d, xm15, 1
|
||||
pextrd r9d, xm15, 3
|
||||
movd r10d, xm7
|
||||
pextrd r11d, xm7, 2
|
||||
pextrd r13d, xm7, 1
|
||||
pextrd rXd, xm7, 3
|
||||
movq xm15, [base+subpel_filters+ r4*8]
|
||||
movq xm10, [base+subpel_filters+ r6*8]
|
||||
movhps xm15, [base+subpel_filters+ r7*8]
|
||||
movhps xm10, [base+subpel_filters+ r9*8]
|
||||
vinserti128 m15, [base+subpel_filters+r10*8], 1
|
||||
vinserti128 m10, [base+subpel_filters+r11*8], 1
|
||||
vpbroadcastq m9, [base+subpel_filters+r13*8]
|
||||
vpbroadcastq m8, [base+subpel_filters+ rX*8]
|
||||
psrld m14, 10
|
||||
vextracti128 xm7, m14, 1
|
||||
movd r4d, xm14
|
||||
pextrd r6d, xm14, 2
|
||||
pextrd r7d, xm14, 1
|
||||
pextrd r9d, xm14, 3
|
||||
movd r10d, xm7
|
||||
pextrd r11d, xm7, 2
|
||||
pextrd r13d, xm7, 1
|
||||
pextrd rXd, xm7, 3
|
||||
mov [rsp+32], r7d
|
||||
pshufd m5, m6, q1100
|
||||
pshufd m6, m6, q3322
|
||||
vpblendd m15, m9, 0xc0
|
||||
vpblendd m10, m8, 0xc0
|
||||
pblendvb m15, m11, m5
|
||||
pblendvb m10, m11, m6
|
||||
vbroadcasti128 m14, [base+subpel_s_shuf8]
|
||||
MC_8TAP_SCALED_H 0, 1, 2, 3, 4, 5, 6, 7 ; 0a 1a 0b 1b
|
||||
MC_8TAP_SCALED_H 1, 2, 3, 4, 5, 6, 7, 8 ; 2a 3a 2b 3b
|
||||
MC_8TAP_SCALED_H 2, 3, 4, 5, 6, 7, 8, 9 ; 4a 5a 4b 5b
|
||||
MC_8TAP_SCALED_H 3, 4, 5, 6, 7, 8, 9, 11 ; 6a 7a 6b 7b
|
||||
mov myd, mym
|
||||
movu [rsp], m10
|
||||
pshufb m0, m14 ; 01a 01b
|
||||
pshufb m1, m14 ; 23a 23b
|
||||
pshufb m2, m14 ; 45a 45b
|
||||
pshufb m3, m14 ; 67a 67b
|
||||
shr myd, 6
|
||||
lea myd, [t1+myq]
|
||||
mov t1d, 64 << 24
|
||||
cmovnz t1q, [base+subpel_filters+myq*8]
|
||||
vbroadcasti128 m14, [base+wswap]
|
||||
movq xm11, t1q
|
||||
punpcklbw xm11, xm11
|
||||
psraw xm11, 8
|
||||
vinserti128 m11, xm11, 1
|
||||
mov r7d, [rsp+32]
|
||||
pshufd m8, m11, q0000
|
||||
pshufd m9, m11, q1111
|
||||
pshufd m10, m11, q2222
|
||||
pshufd m11, m11, q3333
|
||||
.dy1_w8_loop:
|
||||
pmaddwd m4, m0, m8
|
||||
pmaddwd m5, m1, m9
|
||||
pmaddwd m6, m2, m10
|
||||
pmaddwd m7, m3, m11
|
||||
paddd m4, m5
|
||||
paddd m6, m7
|
||||
paddd m4, m13
|
||||
paddd m4, m6
|
||||
psrad m4, rndshift
|
||||
vextracti128 xm5, m4, 1
|
||||
packssdw xm4, xm5
|
||||
%ifidn %1, put
|
||||
packuswb xm4, xm4
|
||||
movq [dstq], xm4
|
||||
add dstq, dsm
|
||||
%else
|
||||
mova [tmpq], xm4
|
||||
add tmpq, 16
|
||||
%endif
|
||||
dec hd
|
||||
jz .ret
|
||||
movq xm4, [srcq+ r4]
|
||||
movq xm5, [srcq+ r6]
|
||||
movhps xm4, [srcq+ r7]
|
||||
movhps xm5, [srcq+ r9]
|
||||
vinserti128 m4, [srcq+r10], 1
|
||||
vinserti128 m5, [srcq+r11], 1
|
||||
vpbroadcastq m6, [srcq+r13]
|
||||
vpbroadcastq m7, [srcq+ rX]
|
||||
add srcq, ssq
|
||||
pshufb m0, m14
|
||||
pshufb m1, m14
|
||||
pshufb m2, m14
|
||||
pshufb m3, m14
|
||||
vpblendd m4, m6, 0xc0
|
||||
vpblendd m5, m7, 0xc0
|
||||
pmaddubsw m4, m15
|
||||
pmaddubsw m5, [rsp]
|
||||
phaddw m4, m5
|
||||
pslld m5, m4, 16
|
||||
paddw m4, m5
|
||||
pmulhrsw m4, m12
|
||||
pblendw m0, m1, 0xaa
|
||||
pblendw m1, m2, 0xaa
|
||||
pblendw m2, m3, 0xaa
|
||||
pblendw m3, m4, 0xaa
|
||||
jmp .dy1_w8_loop
|
||||
mov dword [rsp+72], 1
|
||||
movifprep tmp_stridem, 16
|
||||
jmp .dy1_w_start
|
||||
.dy1_w16:
|
||||
mov dword [rsp+72], 2
|
||||
movifprep tmp_stridem, 32
|
||||
@ -3835,11 +3546,16 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
mov dword [rsp+72], 16
|
||||
movifprep tmp_stridem, 256
|
||||
.dy1_w_start:
|
||||
mov myd, mym
|
||||
%ifidn %1, put
|
||||
movifnidn dsm, dsq
|
||||
%endif
|
||||
shr t0d, 16
|
||||
sub srcq, 3
|
||||
shr myd, 6
|
||||
mov r4d, 64 << 24
|
||||
lea myd, [t1+myq]
|
||||
cmovnz r4q, [base+subpel_filters+myq*8]
|
||||
pmaddwd m8, [base+rescale_mul]
|
||||
movd xm15, t0d
|
||||
mov [rsp+76], t0d
|
||||
@ -3851,6 +3567,10 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
shl dword dxm, 3 ; dx*8
|
||||
vpbroadcastd m15, xm15
|
||||
paddd m14, m8 ; mx+dx*[0-7]
|
||||
movq xm0, r4q
|
||||
punpcklbw xm0, xm0
|
||||
psraw xm0, 8
|
||||
mova [rsp+96], xm0
|
||||
jmp .dy1_hloop
|
||||
.dy1_hloop_prep:
|
||||
dec dword [rsp+72]
|
||||
@ -3910,27 +3630,16 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
MC_8TAP_SCALED_H 1, 2, 3, 4, 5, 6, 7, 8 ; 2a 3a 2b 3b
|
||||
MC_8TAP_SCALED_H 2, 3, 4, 5, 6, 7, 8, 9 ; 4a 5a 4b 5b
|
||||
MC_8TAP_SCALED_H 3, 4, 5, 6, 7, 8, 9, 11 ; 6a 7a 6b 7b
|
||||
mov myd, mym
|
||||
movu [rsp], m10
|
||||
vpbroadcastd m8, [rsp+0x60]
|
||||
vpbroadcastd m9, [rsp+0x64]
|
||||
vpbroadcastd m10, [rsp+0x68]
|
||||
vpbroadcastd m11, [rsp+0x6c]
|
||||
pshufb m0, m14 ; 01a 01b
|
||||
pshufb m1, m14 ; 23a 23b
|
||||
pshufb m2, m14 ; 45a 45b
|
||||
pshufb m3, m14 ; 67a 67b
|
||||
shr myd, 6
|
||||
mov r4d, 64 << 24
|
||||
lea myd, [t1+myq]
|
||||
cmovnz r4q, [base+subpel_filters+myq*8]
|
||||
vbroadcasti128 m14, [base+wswap]
|
||||
movq xm11, r4q
|
||||
punpcklbw xm11, xm11
|
||||
psraw xm11, 8
|
||||
vinserti128 m11, xm11, 1
|
||||
mov r4d, [rsp+64]
|
||||
mov r7d, [rsp+68]
|
||||
pshufd m8, m11, q0000
|
||||
pshufd m9, m11, q1111
|
||||
pshufd m10, m11, q2222
|
||||
pshufd m11, m11, q3333
|
||||
.dy1_vloop:
|
||||
pmaddwd m4, m0, m8
|
||||
pmaddwd m5, m1, m9
|
||||
@ -4182,137 +3891,9 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
jg .dy2_w4_loop
|
||||
MC_8TAP_SCALED_RET
|
||||
.dy2_w8:
|
||||
%ifidn %1, put
|
||||
movifnidn dsm, dsq
|
||||
%endif
|
||||
shr t0d, 16
|
||||
sub srcq, 3
|
||||
movd xm15, t0d
|
||||
pmaddwd m8, [base+rescale_mul]
|
||||
vpbroadcastq m11, [base+pq_0x40000000]
|
||||
vpbroadcastd m15, xm15
|
||||
paddd m14, m8 ; mx+dx*[0-7]
|
||||
pand m6, m14, m10
|
||||
psrld m6, 6
|
||||
paddd m15, m6
|
||||
pcmpeqd m6, m9
|
||||
vextracti128 xm7, m15, 1
|
||||
movd r4d, xm15
|
||||
pextrd r6d, xm15, 2
|
||||
pextrd r7d, xm15, 1
|
||||
pextrd r9d, xm15, 3
|
||||
movd r10d, xm7
|
||||
pextrd r11d, xm7, 2
|
||||
pextrd r13d, xm7, 1
|
||||
pextrd rXd, xm7, 3
|
||||
movq xm15, [base+subpel_filters+ r4*8]
|
||||
movq xm10, [base+subpel_filters+ r6*8]
|
||||
movhps xm15, [base+subpel_filters+ r7*8]
|
||||
movhps xm10, [base+subpel_filters+ r9*8]
|
||||
vinserti128 m15, [base+subpel_filters+r10*8], 1
|
||||
vinserti128 m10, [base+subpel_filters+r11*8], 1
|
||||
vpbroadcastq m9, [base+subpel_filters+r13*8]
|
||||
vpbroadcastq m8, [base+subpel_filters+ rX*8]
|
||||
psrld m14, 10
|
||||
vextracti128 xm7, m14, 1
|
||||
movd r4d, xm14
|
||||
pextrd r6d, xm14, 2
|
||||
pextrd r7d, xm14, 1
|
||||
pextrd r9d, xm14, 3
|
||||
movd r10d, xm7
|
||||
pextrd r11d, xm7, 2
|
||||
pextrd r13d, xm7, 1
|
||||
pextrd rXd, xm7, 3
|
||||
mov [rsp], r7d
|
||||
pshufd m5, m6, q1100
|
||||
pshufd m6, m6, q3322
|
||||
vpblendd m15, m9, 0xc0
|
||||
vpblendd m10, m8, 0xc0
|
||||
pblendvb m15, m11, m5
|
||||
pblendvb m10, m11, m6
|
||||
vbroadcasti128 m14, [base+subpel_s_shuf8]
|
||||
MC_8TAP_SCALED_H 0, 1, 2, 3, 4, 5, 6, 7 ; 0a 1a 0b 1b
|
||||
MC_8TAP_SCALED_H 1, 2, 3, 4, 5, 6, 7, 8 ; 2a 3a 2b 3b
|
||||
MC_8TAP_SCALED_H 2, 3, 4, 5, 6, 7, 8, 9 ; 4a 5a 4b 5b
|
||||
MC_8TAP_SCALED_H 3, 4, 5, 6, 7, 8, 9, 11 ; 6a 7a 6b 7b
|
||||
mov myd, mym
|
||||
pshufb m0, m14 ; 01a 01b
|
||||
pshufb m1, m14 ; 23a 23b
|
||||
pshufb m2, m14 ; 45a 45b
|
||||
pshufb m3, m14 ; 67a 67b
|
||||
shr myd, 6
|
||||
lea myd, [t1+myq]
|
||||
mov t1d, 64 << 24
|
||||
cmovnz t1q, [base+subpel_filters+myq*8]
|
||||
movq xm11, t1q
|
||||
punpcklbw xm11, xm11
|
||||
psraw xm11, 8
|
||||
vinserti128 m11, xm11, 1
|
||||
mov r7d, [rsp]
|
||||
pshufd m8, m11, q0000
|
||||
pshufd m9, m11, q1111
|
||||
pshufd m14, m11, q2222
|
||||
pshufd m11, m11, q3333
|
||||
.dy2_w8_loop:
|
||||
pmaddwd m4, m0, m8
|
||||
pmaddwd m5, m1, m9
|
||||
pmaddwd m6, m2, m14
|
||||
pmaddwd m7, m3, m11
|
||||
paddd m4, m5
|
||||
paddd m6, m7
|
||||
paddd m4, m13
|
||||
paddd m4, m6
|
||||
psrad m4, rndshift
|
||||
vextracti128 xm5, m4, 1
|
||||
packssdw xm4, xm5
|
||||
%ifidn %1, put
|
||||
packuswb xm4, xm4
|
||||
movq [dstq], xm4
|
||||
add dstq, dsm
|
||||
%else
|
||||
mova [tmpq], xm4
|
||||
add tmpq, 16
|
||||
%endif
|
||||
dec hd
|
||||
jz .ret
|
||||
mova m0, m1
|
||||
mova m1, m2
|
||||
mova m2, m3
|
||||
movq xm3, [srcq+ r4]
|
||||
movq xm4, [srcq+ r6]
|
||||
movhps xm3, [srcq+ r7]
|
||||
movhps xm4, [srcq+ r9]
|
||||
vinserti128 m3, [srcq+r10], 1
|
||||
vinserti128 m4, [srcq+r11], 1
|
||||
vpbroadcastq m5, [srcq+r13]
|
||||
vpbroadcastq m6, [srcq+ rX]
|
||||
add srcq, ssq
|
||||
vpblendd m3, m5, 0xc0
|
||||
vpblendd m4, m6, 0xc0
|
||||
pmaddubsw m3, m15
|
||||
pmaddubsw m4, m10
|
||||
phaddw m3, m4
|
||||
movq xm4, [srcq+ r4]
|
||||
movq xm5, [srcq+ r6]
|
||||
movhps xm4, [srcq+ r7]
|
||||
movhps xm5, [srcq+ r9]
|
||||
vinserti128 m4, [srcq+r10], 1
|
||||
vinserti128 m5, [srcq+r11], 1
|
||||
vpbroadcastq m6, [srcq+r13]
|
||||
vpbroadcastq m7, [srcq+ rX]
|
||||
add srcq, ssq
|
||||
vpblendd m4, m6, 0xc0
|
||||
vpblendd m5, m7, 0xc0
|
||||
pmaddubsw m4, m15
|
||||
pmaddubsw m5, m10
|
||||
phaddw m4, m5
|
||||
psrld m5, m3, 16
|
||||
pslld m6, m4, 16
|
||||
paddw m3, m5
|
||||
paddw m4, m6
|
||||
pblendw m3, m4, 0xaa
|
||||
pmulhrsw m3, m12
|
||||
jmp .dy2_w8_loop
|
||||
mov dword [rsp+40], 1
|
||||
movifprep tmp_stridem, 16
|
||||
jmp .dy2_w_start
|
||||
.dy2_w16:
|
||||
mov dword [rsp+40], 2
|
||||
movifprep tmp_stridem, 32
|
||||
@ -4329,11 +3910,16 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
mov dword [rsp+40], 16
|
||||
movifprep tmp_stridem, 256
|
||||
.dy2_w_start:
|
||||
mov myd, mym
|
||||
%ifidn %1, put
|
||||
movifnidn dsm, dsq
|
||||
%endif
|
||||
shr t0d, 16
|
||||
sub srcq, 3
|
||||
shr myd, 6
|
||||
mov r4d, 64 << 24
|
||||
lea myd, [t1+myq]
|
||||
cmovnz r4q, [base+subpel_filters+myq*8]
|
||||
pmaddwd m8, [base+rescale_mul]
|
||||
movd xm15, t0d
|
||||
mov [rsp+64], t0d
|
||||
@ -4345,6 +3931,10 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
shl dword dxm, 3 ; dx*8
|
||||
vpbroadcastd m15, xm15
|
||||
paddd m14, m8 ; mx+dx*[0-7]
|
||||
movq xm0, r4q
|
||||
punpcklbw xm0, xm0
|
||||
psraw xm0, 8
|
||||
mova [rsp+0x50], xm0
|
||||
jmp .dy2_hloop
|
||||
.dy2_hloop_prep:
|
||||
dec dword [rsp+40]
|
||||
@ -4384,7 +3974,6 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
vpbroadcastq m8, [base+subpel_filters+ rX*8]
|
||||
psrld m14, 10
|
||||
vextracti128 xm7, m14, 1
|
||||
movq [rsp+32], xm14
|
||||
movd r4d, xm14
|
||||
pextrd r6d, xm14, 2
|
||||
pextrd r7d, xm14, 1
|
||||
@ -4404,25 +3993,15 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
||||
MC_8TAP_SCALED_H 1, 2, 3, 4, 5, 6, 7, 8 ; 2a 3a 2b 3b
|
||||
MC_8TAP_SCALED_H 2, 3, 4, 5, 6, 7, 8, 9 ; 4a 5a 4b 5b
|
||||
MC_8TAP_SCALED_H 3, 4, 5, 6, 7, 8, 9, 11 ; 6a 7a 6b 7b
|
||||
mov myd, mym
|
||||
vpbroadcastd m8, [rsp+0x50]
|
||||
vpbroadcastd m9, [rsp+0x54]
|
||||
vpbroadcastd m11, [rsp+0x58]
|
||||
vpbroadcastd m4, [rsp+0x5c]
|
||||
pshufb m0, m14 ; 01a 01b
|
||||
pshufb m1, m14 ; 23a 23b
|
||||
pshufb m2, m14 ; 45a 45b
|
||||
pshufb m3, m14 ; 67a 67b
|
||||
shr myd, 6
|
||||
mov r4d, 64 << 24
|
||||
lea myd, [t1+myq]
|
||||
cmovnz r4q, [base+subpel_filters+myq*8]
|
||||
movq xm14, r4q
|
||||
punpcklbw xm14, xm14
|
||||
psraw xm14, 8
|
||||
vinserti128 m14, xm14, 1
|
||||
mov r4d, [rsp+32]
|
||||
mov r7d, [rsp+36]
|
||||
pshufd m8, m14, q0000
|
||||
pshufd m9, m14, q1111
|
||||
pshufd m11, m14, q2222
|
||||
pshufd m14, m14, q3333
|
||||
SWAP m14, m4
|
||||
.dy2_vloop:
|
||||
pmaddwd m4, m0, m8
|
||||
pmaddwd m5, m1, m9
|
||||
|
1
third_party/dav1d/src/x86/mc_avx512.asm
vendored
1
third_party/dav1d/src/x86/mc_avx512.asm
vendored
@ -23,6 +23,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
%if HAVE_AVX512ICL && ARCH_X86_64
|
||||
|
44
third_party/dav1d/src/x86/mc_init_tmpl.c
vendored
44
third_party/dav1d/src/x86/mc_init_tmpl.c
vendored
@ -91,26 +91,46 @@ decl_mct_fn(dav1d_prep_bilin_ssse3);
|
||||
decl_mct_fn(dav1d_prep_bilin_sse2);
|
||||
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_avx2);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_ssse3);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_smooth_avx2);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_smooth_ssse3);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_sharp_avx2);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_sharp_ssse3);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_avx2);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_ssse3);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_regular_avx2);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_regular_ssse3);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_sharp_avx2);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_sharp_ssse3);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_avx2);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_ssse3);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_regular_avx2);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_regular_ssse3);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_smooth_avx2);
|
||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_smooth_ssse3);
|
||||
decl_mc_scaled_fn(dav1d_put_bilin_scaled_avx2);
|
||||
decl_mc_scaled_fn(dav1d_put_bilin_scaled_ssse3);
|
||||
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_avx2);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_ssse3);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_smooth_avx2);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_smooth_ssse3);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_sharp_avx2);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_sharp_ssse3);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_avx2);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_ssse3);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_regular_avx2);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_regular_ssse3);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_sharp_avx2);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_sharp_ssse3);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_avx2);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_ssse3);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_regular_avx2);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_regular_ssse3);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_smooth_avx2);
|
||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_smooth_ssse3);
|
||||
decl_mct_scaled_fn(dav1d_prep_bilin_scaled_avx2);
|
||||
decl_mct_scaled_fn(dav1d_prep_bilin_scaled_ssse3);
|
||||
|
||||
decl_avg_fn(dav1d_avg_avx512icl);
|
||||
decl_avg_fn(dav1d_avg_avx2);
|
||||
@ -207,6 +227,30 @@ COLD void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) {
|
||||
init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, ssse3);
|
||||
init_mct_fn(FILTER_2D_8TAP_SHARP, 8tap_sharp, ssse3);
|
||||
|
||||
#if ARCH_X86_64
|
||||
init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR, 8tap_scaled_regular, ssse3);
|
||||
init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, ssse3);
|
||||
init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_scaled_regular_sharp, ssse3);
|
||||
init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, ssse3);
|
||||
init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH, 8tap_scaled_smooth, ssse3);
|
||||
init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_scaled_smooth_sharp, ssse3);
|
||||
init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_scaled_sharp_regular, ssse3);
|
||||
init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_scaled_sharp_smooth, ssse3);
|
||||
init_mc_scaled_fn(FILTER_2D_8TAP_SHARP, 8tap_scaled_sharp, ssse3);
|
||||
init_mc_scaled_fn(FILTER_2D_BILINEAR, bilin_scaled, ssse3);
|
||||
|
||||
init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR, 8tap_scaled_regular, ssse3);
|
||||
init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, ssse3);
|
||||
init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_scaled_regular_sharp, ssse3);
|
||||
init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, ssse3);
|
||||
init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH, 8tap_scaled_smooth, ssse3);
|
||||
init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_scaled_smooth_sharp, ssse3);
|
||||
init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_scaled_sharp_regular, ssse3);
|
||||
init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_scaled_sharp_smooth, ssse3);
|
||||
init_mct_scaled_fn(FILTER_2D_8TAP_SHARP, 8tap_scaled_sharp, ssse3);
|
||||
init_mct_scaled_fn(FILTER_2D_BILINEAR, bilin_scaled, ssse3);
|
||||
#endif
|
||||
|
||||
c->avg = dav1d_avg_ssse3;
|
||||
c->w_avg = dav1d_w_avg_ssse3;
|
||||
c->mask = dav1d_mask_ssse3;
|
||||
|
1818
third_party/dav1d/src/x86/mc_sse.asm
vendored
1818
third_party/dav1d/src/x86/mc_sse.asm
vendored
File diff suppressed because it is too large
Load Diff
1
third_party/dav1d/src/x86/msac.asm
vendored
1
third_party/dav1d/src/x86/msac.asm
vendored
@ -23,6 +23,7 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "config.asm"
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
SECTION_RODATA 64 ; avoids cacheline splits
|
||||
|
24
third_party/dav1d/tests/checkasm/checkasm.c
vendored
24
third_party/dav1d/tests/checkasm/checkasm.c
vendored
@ -518,9 +518,7 @@ static void print_cpu_name(void) {
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
(void)func_new, (void)func_ref;
|
||||
state.seed = get_seed();
|
||||
int ret = 0;
|
||||
|
||||
while (argc > 1) {
|
||||
if (!strncmp(argv[1], "--help", 6)) {
|
||||
@ -568,6 +566,24 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
dav1d_init_cpu();
|
||||
|
||||
#ifdef readtime
|
||||
if (state.bench_pattern) {
|
||||
static int testing = 0;
|
||||
checkasm_save_context();
|
||||
if (!testing) {
|
||||
checkasm_set_signal_handler_state(1);
|
||||
testing = 1;
|
||||
readtime();
|
||||
checkasm_set_signal_handler_state(0);
|
||||
} else {
|
||||
fprintf(stderr, "checkasm: unable to access cycle counter\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
int ret = 0;
|
||||
|
||||
if (!state.function_listing) {
|
||||
fprintf(stderr, "checkasm: using random seed %u\n", state.seed);
|
||||
#if ARCH_X86_64
|
||||
@ -672,7 +688,9 @@ int checkasm_bench_func(void) {
|
||||
/* Indicate that the current test has failed, return whether verbose printing
|
||||
* is requested. */
|
||||
int checkasm_fail_func(const char *const msg, ...) {
|
||||
if (state.current_func_ver->cpu && state.current_func_ver->ok) {
|
||||
if (state.current_func_ver && state.current_func_ver->cpu &&
|
||||
state.current_func_ver->ok)
|
||||
{
|
||||
va_list arg;
|
||||
|
||||
print_cpu_name();
|
||||
|
6
third_party/dav1d/tests/checkasm/checkasm.h
vendored
6
third_party/dav1d/tests/checkasm/checkasm.h
vendored
@ -86,8 +86,6 @@ int float_near_abs_eps_array(const float *a, const float *b, float eps,
|
||||
int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
|
||||
unsigned max_ulp, int len);
|
||||
|
||||
static void *func_ref, *func_new;
|
||||
|
||||
#define BENCH_RUNS (1 << 12) /* Trade-off between accuracy and speed */
|
||||
|
||||
/* Decide whether or not the specified function needs to be tested */
|
||||
@ -99,6 +97,7 @@ static void *func_ref, *func_new;
|
||||
* is optional. */
|
||||
#define declare_func(ret, ...)\
|
||||
declare_new(ret, __VA_ARGS__)\
|
||||
void *func_ref, *func_new;\
|
||||
typedef ret func_type(__VA_ARGS__);\
|
||||
checkasm_save_context()
|
||||
|
||||
@ -127,6 +126,9 @@ static inline uint64_t readtime(void) {
|
||||
}
|
||||
#define readtime readtime
|
||||
#endif
|
||||
#elif (ARCH_AARCH64 || ARCH_ARM) && defined(__APPLE__)
|
||||
#include <mach/mach_time.h>
|
||||
#define readtime() mach_absolute_time()
|
||||
#elif ARCH_AARCH64
|
||||
#ifdef _MSC_VER
|
||||
#include <windows.h>
|
||||
|
26
third_party/dav1d/tests/checkasm/msac.c
vendored
26
third_party/dav1d/tests/checkasm/msac.c
vendored
@ -140,11 +140,11 @@ static void check_decode_symbol(MsacDSPContext *const c, uint8_t *const buf) {
|
||||
report("decode_symbol");
|
||||
}
|
||||
|
||||
static void check_decode_bool(MsacDSPContext *const c, uint8_t *const buf) {
|
||||
static void check_decode_bool_adapt(MsacDSPContext *const c, uint8_t *const buf) {
|
||||
MsacContext s_c, s_a;
|
||||
|
||||
declare_func(unsigned, MsacContext *s, uint16_t *cdf);
|
||||
if (check_func(c->bool_adapt, "msac_decode_bool_adapt")) {
|
||||
declare_func(unsigned, MsacContext *s, uint16_t *cdf);
|
||||
uint16_t cdf[2][2];
|
||||
for (int cdf_update = 0; cdf_update <= 1; cdf_update++) {
|
||||
dav1d_msac_init(&s_c, buf, BUF_SIZE, !cdf_update);
|
||||
@ -165,9 +165,13 @@ static void check_decode_bool(MsacDSPContext *const c, uint8_t *const buf) {
|
||||
bench_new(&s_a, cdf[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void check_decode_bool_equi(MsacDSPContext *const c, uint8_t *const buf) {
|
||||
MsacContext s_c, s_a;
|
||||
|
||||
declare_func(unsigned, MsacContext *s);
|
||||
if (check_func(c->bool_equi, "msac_decode_bool_equi")) {
|
||||
declare_func(unsigned, MsacContext *s);
|
||||
dav1d_msac_init(&s_c, buf, BUF_SIZE, 1);
|
||||
s_a = s_c;
|
||||
for (int i = 0; i < 64; i++) {
|
||||
@ -180,9 +184,13 @@ static void check_decode_bool(MsacDSPContext *const c, uint8_t *const buf) {
|
||||
}
|
||||
bench_new(&s_a);
|
||||
}
|
||||
}
|
||||
|
||||
static void check_decode_bool(MsacDSPContext *const c, uint8_t *const buf) {
|
||||
MsacContext s_c, s_a;
|
||||
|
||||
declare_func(unsigned, MsacContext *s, unsigned f);
|
||||
if (check_func(c->bool, "msac_decode_bool")) {
|
||||
declare_func(unsigned, MsacContext *s, unsigned f);
|
||||
dav1d_msac_init(&s_c, buf, BUF_SIZE, 1);
|
||||
s_a = s_c;
|
||||
for (int i = 0; i < 64; i++) {
|
||||
@ -197,6 +205,12 @@ static void check_decode_bool(MsacDSPContext *const c, uint8_t *const buf) {
|
||||
bench_new(&s_a, 16384);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void check_decode_bool_funcs(MsacDSPContext *const c, uint8_t *const buf) {
|
||||
check_decode_bool_adapt(c, buf);
|
||||
check_decode_bool_equi(c, buf);
|
||||
check_decode_bool(c, buf);
|
||||
report("decode_bool");
|
||||
}
|
||||
|
||||
@ -204,8 +218,8 @@ static void check_decode_hi_tok(MsacDSPContext *const c, uint8_t *const buf) {
|
||||
ALIGN_STK_16(uint16_t, cdf, 2, [16]);
|
||||
MsacContext s_c, s_a;
|
||||
|
||||
declare_func(unsigned, MsacContext *s, uint16_t *cdf);
|
||||
if (check_func(c->hi_tok, "msac_decode_hi_tok")) {
|
||||
declare_func(unsigned, MsacContext *s, uint16_t *cdf);
|
||||
for (int cdf_update = 0; cdf_update <= 1; cdf_update++) {
|
||||
dav1d_msac_init(&s_c, buf, BUF_SIZE, !cdf_update);
|
||||
s_a = s_c;
|
||||
@ -272,6 +286,6 @@ void checkasm_check_msac(void) {
|
||||
buf[i] = rnd();
|
||||
|
||||
check_decode_symbol(&c, buf);
|
||||
check_decode_bool(&c, buf);
|
||||
check_decode_bool_funcs(&c, buf);
|
||||
check_decode_hi_tok(&c, buf);
|
||||
}
|
||||
|
@ -23,8 +23,9 @@
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%define private_prefix checkasm
|
||||
%include "config.asm"
|
||||
%undef private_prefix
|
||||
%define private_prefix checkasm
|
||||
%include "ext/x86/x86inc.asm"
|
||||
|
||||
SECTION_RODATA 16
|
||||
|
Loading…
Reference in New Issue
Block a user