mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-07 18:04:46 +00:00
Bug 1657200 - Update libdav1d to d0e50cac for Firefox 81. r=mjf
Differential Revision: https://phabricator.services.mozilla.com/D88182
This commit is contained in:
parent
14bfc1e8f4
commit
342f755dce
@ -66,6 +66,8 @@ if stack_alignment == 0:
|
|||||||
DEFINES['STACK_ALIGNMENT'] = stack_alignment
|
DEFINES['STACK_ALIGNMENT'] = stack_alignment
|
||||||
|
|
||||||
if CONFIG['CPU_ARCH'] in ('x86', 'x86_64'):
|
if CONFIG['CPU_ARCH'] in ('x86', 'x86_64'):
|
||||||
|
ASFLAGS += ['-Dprivate_prefix=dav1d']
|
||||||
|
|
||||||
SOURCES += [
|
SOURCES += [
|
||||||
'../../../third_party/dav1d/src/x86/cpu.c',
|
'../../../third_party/dav1d/src/x86/cpu.c',
|
||||||
'../../../third_party/dav1d/src/x86/msac_init.c',
|
'../../../third_party/dav1d/src/x86/msac_init.c',
|
||||||
|
@ -20,11 +20,11 @@ origin:
|
|||||||
|
|
||||||
# Human-readable identifier for this version/release
|
# Human-readable identifier for this version/release
|
||||||
# Generally "version NNN", "tag SSS", "bookmark SSS"
|
# Generally "version NNN", "tag SSS", "bookmark SSS"
|
||||||
release: commit 6cf58c8e7deb54e287afeee6710b2a3774eded9c (2020-07-20T15:40:01.000+02:00).
|
release: commit d0e50cacead63e9904dde184580ce9a746374bd5 (2020-08-21T15:13:49.000+02:00).
|
||||||
|
|
||||||
# Revision to pull in
|
# Revision to pull in
|
||||||
# Must be a long or short commit SHA (long preferred)
|
# Must be a long or short commit SHA (long preferred)
|
||||||
revision: 6cf58c8e7deb54e287afeee6710b2a3774eded9c
|
revision: d0e50cacead63e9904dde184580ce9a746374bd5
|
||||||
|
|
||||||
# The package's license, where possible using the mnemonic from
|
# The package's license, where possible using the mnemonic from
|
||||||
# https://spdx.org/licenses/
|
# https://spdx.org/licenses/
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
/* auto-generated, do not edit */
|
/* auto-generated, do not edit */
|
||||||
#define DAV1D_VERSION "0.7.1-36-g6cf58c8"
|
#define DAV1D_VERSION "0.7.1-49-gd0e50ca"
|
||||||
|
1
third_party/dav1d/meson.build
vendored
1
third_party/dav1d/meson.build
vendored
@ -351,6 +351,7 @@ cdata.set10('ARCH_X86_64', host_machine.cpu_family() == 'x86_64')
|
|||||||
cdata.set10('ARCH_X86_32', host_machine.cpu_family() == 'x86')
|
cdata.set10('ARCH_X86_32', host_machine.cpu_family() == 'x86')
|
||||||
|
|
||||||
if host_machine.cpu_family().startswith('x86')
|
if host_machine.cpu_family().startswith('x86')
|
||||||
|
cdata_asm.set('private_prefix', 'dav1d')
|
||||||
cdata_asm.set10('ARCH_X86_64', host_machine.cpu_family() == 'x86_64')
|
cdata_asm.set10('ARCH_X86_64', host_machine.cpu_family() == 'x86_64')
|
||||||
cdata_asm.set10('ARCH_X86_32', host_machine.cpu_family() == 'x86')
|
cdata_asm.set10('ARCH_X86_32', host_machine.cpu_family() == 'x86')
|
||||||
cdata_asm.set10('PIC', true)
|
cdata_asm.set10('PIC', true)
|
||||||
|
89
third_party/dav1d/src/ext/x86/x86inc.asm
vendored
89
third_party/dav1d/src/ext/x86/x86inc.asm
vendored
@ -1,5 +1,5 @@
|
|||||||
;*****************************************************************************
|
;*****************************************************************************
|
||||||
;* x86inc.asm: x264asm abstraction layer
|
;* x86inc.asm: x86 abstraction layer
|
||||||
;*****************************************************************************
|
;*****************************************************************************
|
||||||
;* Copyright (C) 2005-2020 x264 project
|
;* Copyright (C) 2005-2020 x264 project
|
||||||
;*
|
;*
|
||||||
@ -21,23 +21,14 @@
|
|||||||
;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
;*****************************************************************************
|
;*****************************************************************************
|
||||||
|
|
||||||
; This is a header file for the x264ASM assembly language, which uses
|
; This is a header file for the x86inc.asm assembly language, which uses
|
||||||
; NASM/YASM syntax combined with a large number of macros to provide easy
|
; NASM/YASM syntax combined with a large number of macros to provide easy
|
||||||
; abstraction between different calling conventions (x86_32, win64, linux64).
|
; abstraction between different calling conventions (x86_32, win64, linux64).
|
||||||
; It also has various other useful features to simplify writing the kind of
|
; It also has various other useful features to simplify writing the kind of
|
||||||
; DSP functions that are most often used in x264.
|
; DSP functions that are most often used.
|
||||||
|
|
||||||
; Unlike the rest of x264, this file is available under an ISC license, as it
|
|
||||||
; has significant usefulness outside of x264 and we want it to be available
|
|
||||||
; to the largest audience possible. Of course, if you modify it for your own
|
|
||||||
; purposes to add a new feature, we strongly encourage contributing a patch
|
|
||||||
; as this feature might be useful for others as well. Send patches or ideas
|
|
||||||
; to x264-devel@videolan.org .
|
|
||||||
|
|
||||||
%include "config.asm"
|
|
||||||
|
|
||||||
%ifndef private_prefix
|
%ifndef private_prefix
|
||||||
%define private_prefix dav1d
|
%error private_prefix not defined
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%ifndef public_prefix
|
%ifndef public_prefix
|
||||||
@ -118,7 +109,7 @@
|
|||||||
; Macros to eliminate most code duplication between x86_32 and x86_64:
|
; Macros to eliminate most code duplication between x86_32 and x86_64:
|
||||||
; Currently this works only for leaf functions which load all their arguments
|
; Currently this works only for leaf functions which load all their arguments
|
||||||
; into registers at the start, and make no other use of the stack. Luckily that
|
; into registers at the start, and make no other use of the stack. Luckily that
|
||||||
; covers most of x264's asm.
|
; covers most use cases.
|
||||||
|
|
||||||
; PROLOGUE:
|
; PROLOGUE:
|
||||||
; %1 = number of arguments. loads them from stack if needed.
|
; %1 = number of arguments. loads them from stack if needed.
|
||||||
@ -1522,18 +1513,18 @@ AVX_INSTR orps, sse, 1, 0, 1
|
|||||||
AVX_INSTR pabsb, ssse3
|
AVX_INSTR pabsb, ssse3
|
||||||
AVX_INSTR pabsd, ssse3
|
AVX_INSTR pabsd, ssse3
|
||||||
AVX_INSTR pabsw, ssse3
|
AVX_INSTR pabsw, ssse3
|
||||||
AVX_INSTR packsswb, mmx, 0, 0, 0
|
|
||||||
AVX_INSTR packssdw, mmx, 0, 0, 0
|
AVX_INSTR packssdw, mmx, 0, 0, 0
|
||||||
AVX_INSTR packuswb, mmx, 0, 0, 0
|
AVX_INSTR packsswb, mmx, 0, 0, 0
|
||||||
AVX_INSTR packusdw, sse4, 0, 0, 0
|
AVX_INSTR packusdw, sse4, 0, 0, 0
|
||||||
|
AVX_INSTR packuswb, mmx, 0, 0, 0
|
||||||
AVX_INSTR paddb, mmx, 0, 0, 1
|
AVX_INSTR paddb, mmx, 0, 0, 1
|
||||||
AVX_INSTR paddw, mmx, 0, 0, 1
|
|
||||||
AVX_INSTR paddd, mmx, 0, 0, 1
|
AVX_INSTR paddd, mmx, 0, 0, 1
|
||||||
AVX_INSTR paddq, sse2, 0, 0, 1
|
AVX_INSTR paddq, sse2, 0, 0, 1
|
||||||
AVX_INSTR paddsb, mmx, 0, 0, 1
|
AVX_INSTR paddsb, mmx, 0, 0, 1
|
||||||
AVX_INSTR paddsw, mmx, 0, 0, 1
|
AVX_INSTR paddsw, mmx, 0, 0, 1
|
||||||
AVX_INSTR paddusb, mmx, 0, 0, 1
|
AVX_INSTR paddusb, mmx, 0, 0, 1
|
||||||
AVX_INSTR paddusw, mmx, 0, 0, 1
|
AVX_INSTR paddusw, mmx, 0, 0, 1
|
||||||
|
AVX_INSTR paddw, mmx, 0, 0, 1
|
||||||
AVX_INSTR palignr, ssse3, 0, 1, 0
|
AVX_INSTR palignr, ssse3, 0, 1, 0
|
||||||
AVX_INSTR pand, mmx, 0, 0, 1
|
AVX_INSTR pand, mmx, 0, 0, 1
|
||||||
AVX_INSTR pandn, mmx, 0, 0, 0
|
AVX_INSTR pandn, mmx, 0, 0, 0
|
||||||
@ -1541,71 +1532,71 @@ AVX_INSTR pavgb, mmx2, 0, 0, 1
|
|||||||
AVX_INSTR pavgw, mmx2, 0, 0, 1
|
AVX_INSTR pavgw, mmx2, 0, 0, 1
|
||||||
AVX_INSTR pblendvb, sse4 ; can't be emulated
|
AVX_INSTR pblendvb, sse4 ; can't be emulated
|
||||||
AVX_INSTR pblendw, sse4, 0, 1, 0
|
AVX_INSTR pblendw, sse4, 0, 1, 0
|
||||||
AVX_INSTR pclmulqdq, fnord, 0, 1, 0
|
|
||||||
AVX_INSTR pclmulhqhqdq, fnord, 0, 0, 0
|
AVX_INSTR pclmulhqhqdq, fnord, 0, 0, 0
|
||||||
AVX_INSTR pclmulhqlqdq, fnord, 0, 0, 0
|
AVX_INSTR pclmulhqlqdq, fnord, 0, 0, 0
|
||||||
AVX_INSTR pclmullqhqdq, fnord, 0, 0, 0
|
AVX_INSTR pclmullqhqdq, fnord, 0, 0, 0
|
||||||
AVX_INSTR pclmullqlqdq, fnord, 0, 0, 0
|
AVX_INSTR pclmullqlqdq, fnord, 0, 0, 0
|
||||||
AVX_INSTR pcmpestri, sse42
|
AVX_INSTR pclmulqdq, fnord, 0, 1, 0
|
||||||
AVX_INSTR pcmpestrm, sse42
|
|
||||||
AVX_INSTR pcmpistri, sse42
|
|
||||||
AVX_INSTR pcmpistrm, sse42
|
|
||||||
AVX_INSTR pcmpeqb, mmx, 0, 0, 1
|
AVX_INSTR pcmpeqb, mmx, 0, 0, 1
|
||||||
AVX_INSTR pcmpeqw, mmx, 0, 0, 1
|
|
||||||
AVX_INSTR pcmpeqd, mmx, 0, 0, 1
|
AVX_INSTR pcmpeqd, mmx, 0, 0, 1
|
||||||
AVX_INSTR pcmpeqq, sse4, 0, 0, 1
|
AVX_INSTR pcmpeqq, sse4, 0, 0, 1
|
||||||
|
AVX_INSTR pcmpeqw, mmx, 0, 0, 1
|
||||||
|
AVX_INSTR pcmpestri, sse42
|
||||||
|
AVX_INSTR pcmpestrm, sse42
|
||||||
AVX_INSTR pcmpgtb, mmx, 0, 0, 0
|
AVX_INSTR pcmpgtb, mmx, 0, 0, 0
|
||||||
AVX_INSTR pcmpgtw, mmx, 0, 0, 0
|
|
||||||
AVX_INSTR pcmpgtd, mmx, 0, 0, 0
|
AVX_INSTR pcmpgtd, mmx, 0, 0, 0
|
||||||
AVX_INSTR pcmpgtq, sse42, 0, 0, 0
|
AVX_INSTR pcmpgtq, sse42, 0, 0, 0
|
||||||
|
AVX_INSTR pcmpgtw, mmx, 0, 0, 0
|
||||||
|
AVX_INSTR pcmpistri, sse42
|
||||||
|
AVX_INSTR pcmpistrm, sse42
|
||||||
AVX_INSTR pextrb, sse4
|
AVX_INSTR pextrb, sse4
|
||||||
AVX_INSTR pextrd, sse4
|
AVX_INSTR pextrd, sse4
|
||||||
AVX_INSTR pextrq, sse4
|
AVX_INSTR pextrq, sse4
|
||||||
AVX_INSTR pextrw, mmx2
|
AVX_INSTR pextrw, mmx2
|
||||||
AVX_INSTR phaddw, ssse3, 0, 0, 0
|
|
||||||
AVX_INSTR phaddd, ssse3, 0, 0, 0
|
AVX_INSTR phaddd, ssse3, 0, 0, 0
|
||||||
AVX_INSTR phaddsw, ssse3, 0, 0, 0
|
AVX_INSTR phaddsw, ssse3, 0, 0, 0
|
||||||
|
AVX_INSTR phaddw, ssse3, 0, 0, 0
|
||||||
AVX_INSTR phminposuw, sse4
|
AVX_INSTR phminposuw, sse4
|
||||||
AVX_INSTR phsubw, ssse3, 0, 0, 0
|
|
||||||
AVX_INSTR phsubd, ssse3, 0, 0, 0
|
AVX_INSTR phsubd, ssse3, 0, 0, 0
|
||||||
AVX_INSTR phsubsw, ssse3, 0, 0, 0
|
AVX_INSTR phsubsw, ssse3, 0, 0, 0
|
||||||
|
AVX_INSTR phsubw, ssse3, 0, 0, 0
|
||||||
AVX_INSTR pinsrb, sse4, 0, 1, 0
|
AVX_INSTR pinsrb, sse4, 0, 1, 0
|
||||||
AVX_INSTR pinsrd, sse4, 0, 1, 0
|
AVX_INSTR pinsrd, sse4, 0, 1, 0
|
||||||
AVX_INSTR pinsrq, sse4, 0, 1, 0
|
AVX_INSTR pinsrq, sse4, 0, 1, 0
|
||||||
AVX_INSTR pinsrw, mmx2, 0, 1, 0
|
AVX_INSTR pinsrw, mmx2, 0, 1, 0
|
||||||
AVX_INSTR pmaddwd, mmx, 0, 0, 1
|
|
||||||
AVX_INSTR pmaddubsw, ssse3, 0, 0, 0
|
AVX_INSTR pmaddubsw, ssse3, 0, 0, 0
|
||||||
|
AVX_INSTR pmaddwd, mmx, 0, 0, 1
|
||||||
AVX_INSTR pmaxsb, sse4, 0, 0, 1
|
AVX_INSTR pmaxsb, sse4, 0, 0, 1
|
||||||
AVX_INSTR pmaxsw, mmx2, 0, 0, 1
|
|
||||||
AVX_INSTR pmaxsd, sse4, 0, 0, 1
|
AVX_INSTR pmaxsd, sse4, 0, 0, 1
|
||||||
|
AVX_INSTR pmaxsw, mmx2, 0, 0, 1
|
||||||
AVX_INSTR pmaxub, mmx2, 0, 0, 1
|
AVX_INSTR pmaxub, mmx2, 0, 0, 1
|
||||||
AVX_INSTR pmaxuw, sse4, 0, 0, 1
|
|
||||||
AVX_INSTR pmaxud, sse4, 0, 0, 1
|
AVX_INSTR pmaxud, sse4, 0, 0, 1
|
||||||
|
AVX_INSTR pmaxuw, sse4, 0, 0, 1
|
||||||
AVX_INSTR pminsb, sse4, 0, 0, 1
|
AVX_INSTR pminsb, sse4, 0, 0, 1
|
||||||
AVX_INSTR pminsw, mmx2, 0, 0, 1
|
|
||||||
AVX_INSTR pminsd, sse4, 0, 0, 1
|
AVX_INSTR pminsd, sse4, 0, 0, 1
|
||||||
|
AVX_INSTR pminsw, mmx2, 0, 0, 1
|
||||||
AVX_INSTR pminub, mmx2, 0, 0, 1
|
AVX_INSTR pminub, mmx2, 0, 0, 1
|
||||||
AVX_INSTR pminuw, sse4, 0, 0, 1
|
|
||||||
AVX_INSTR pminud, sse4, 0, 0, 1
|
AVX_INSTR pminud, sse4, 0, 0, 1
|
||||||
|
AVX_INSTR pminuw, sse4, 0, 0, 1
|
||||||
AVX_INSTR pmovmskb, mmx2
|
AVX_INSTR pmovmskb, mmx2
|
||||||
AVX_INSTR pmovsxbw, sse4
|
|
||||||
AVX_INSTR pmovsxbd, sse4
|
AVX_INSTR pmovsxbd, sse4
|
||||||
AVX_INSTR pmovsxbq, sse4
|
AVX_INSTR pmovsxbq, sse4
|
||||||
|
AVX_INSTR pmovsxbw, sse4
|
||||||
|
AVX_INSTR pmovsxdq, sse4
|
||||||
AVX_INSTR pmovsxwd, sse4
|
AVX_INSTR pmovsxwd, sse4
|
||||||
AVX_INSTR pmovsxwq, sse4
|
AVX_INSTR pmovsxwq, sse4
|
||||||
AVX_INSTR pmovsxdq, sse4
|
|
||||||
AVX_INSTR pmovzxbw, sse4
|
|
||||||
AVX_INSTR pmovzxbd, sse4
|
AVX_INSTR pmovzxbd, sse4
|
||||||
AVX_INSTR pmovzxbq, sse4
|
AVX_INSTR pmovzxbq, sse4
|
||||||
|
AVX_INSTR pmovzxbw, sse4
|
||||||
|
AVX_INSTR pmovzxdq, sse4
|
||||||
AVX_INSTR pmovzxwd, sse4
|
AVX_INSTR pmovzxwd, sse4
|
||||||
AVX_INSTR pmovzxwq, sse4
|
AVX_INSTR pmovzxwq, sse4
|
||||||
AVX_INSTR pmovzxdq, sse4
|
|
||||||
AVX_INSTR pmuldq, sse4, 0, 0, 1
|
AVX_INSTR pmuldq, sse4, 0, 0, 1
|
||||||
AVX_INSTR pmulhrsw, ssse3, 0, 0, 1
|
AVX_INSTR pmulhrsw, ssse3, 0, 0, 1
|
||||||
AVX_INSTR pmulhuw, mmx2, 0, 0, 1
|
AVX_INSTR pmulhuw, mmx2, 0, 0, 1
|
||||||
AVX_INSTR pmulhw, mmx, 0, 0, 1
|
AVX_INSTR pmulhw, mmx, 0, 0, 1
|
||||||
AVX_INSTR pmullw, mmx, 0, 0, 1
|
|
||||||
AVX_INSTR pmulld, sse4, 0, 0, 1
|
AVX_INSTR pmulld, sse4, 0, 0, 1
|
||||||
|
AVX_INSTR pmullw, mmx, 0, 0, 1
|
||||||
AVX_INSTR pmuludq, sse2, 0, 0, 1
|
AVX_INSTR pmuludq, sse2, 0, 0, 1
|
||||||
AVX_INSTR por, mmx, 0, 0, 1
|
AVX_INSTR por, mmx, 0, 0, 1
|
||||||
AVX_INSTR psadbw, mmx2, 0, 0, 1
|
AVX_INSTR psadbw, mmx2, 0, 0, 1
|
||||||
@ -1614,35 +1605,35 @@ AVX_INSTR pshufd, sse2
|
|||||||
AVX_INSTR pshufhw, sse2
|
AVX_INSTR pshufhw, sse2
|
||||||
AVX_INSTR pshuflw, sse2
|
AVX_INSTR pshuflw, sse2
|
||||||
AVX_INSTR psignb, ssse3, 0, 0, 0
|
AVX_INSTR psignb, ssse3, 0, 0, 0
|
||||||
AVX_INSTR psignw, ssse3, 0, 0, 0
|
|
||||||
AVX_INSTR psignd, ssse3, 0, 0, 0
|
AVX_INSTR psignd, ssse3, 0, 0, 0
|
||||||
AVX_INSTR psllw, mmx, 0, 0, 0
|
AVX_INSTR psignw, ssse3, 0, 0, 0
|
||||||
AVX_INSTR pslld, mmx, 0, 0, 0
|
AVX_INSTR pslld, mmx, 0, 0, 0
|
||||||
AVX_INSTR psllq, mmx, 0, 0, 0
|
|
||||||
AVX_INSTR pslldq, sse2, 0, 0, 0
|
AVX_INSTR pslldq, sse2, 0, 0, 0
|
||||||
AVX_INSTR psraw, mmx, 0, 0, 0
|
AVX_INSTR psllq, mmx, 0, 0, 0
|
||||||
|
AVX_INSTR psllw, mmx, 0, 0, 0
|
||||||
AVX_INSTR psrad, mmx, 0, 0, 0
|
AVX_INSTR psrad, mmx, 0, 0, 0
|
||||||
AVX_INSTR psrlw, mmx, 0, 0, 0
|
AVX_INSTR psraw, mmx, 0, 0, 0
|
||||||
AVX_INSTR psrld, mmx, 0, 0, 0
|
AVX_INSTR psrld, mmx, 0, 0, 0
|
||||||
AVX_INSTR psrlq, mmx, 0, 0, 0
|
|
||||||
AVX_INSTR psrldq, sse2, 0, 0, 0
|
AVX_INSTR psrldq, sse2, 0, 0, 0
|
||||||
|
AVX_INSTR psrlq, mmx, 0, 0, 0
|
||||||
|
AVX_INSTR psrlw, mmx, 0, 0, 0
|
||||||
AVX_INSTR psubb, mmx, 0, 0, 0
|
AVX_INSTR psubb, mmx, 0, 0, 0
|
||||||
AVX_INSTR psubw, mmx, 0, 0, 0
|
|
||||||
AVX_INSTR psubd, mmx, 0, 0, 0
|
AVX_INSTR psubd, mmx, 0, 0, 0
|
||||||
AVX_INSTR psubq, sse2, 0, 0, 0
|
AVX_INSTR psubq, sse2, 0, 0, 0
|
||||||
AVX_INSTR psubsb, mmx, 0, 0, 0
|
AVX_INSTR psubsb, mmx, 0, 0, 0
|
||||||
AVX_INSTR psubsw, mmx, 0, 0, 0
|
AVX_INSTR psubsw, mmx, 0, 0, 0
|
||||||
AVX_INSTR psubusb, mmx, 0, 0, 0
|
AVX_INSTR psubusb, mmx, 0, 0, 0
|
||||||
AVX_INSTR psubusw, mmx, 0, 0, 0
|
AVX_INSTR psubusw, mmx, 0, 0, 0
|
||||||
|
AVX_INSTR psubw, mmx, 0, 0, 0
|
||||||
AVX_INSTR ptest, sse4
|
AVX_INSTR ptest, sse4
|
||||||
AVX_INSTR punpckhbw, mmx, 0, 0, 0
|
AVX_INSTR punpckhbw, mmx, 0, 0, 0
|
||||||
AVX_INSTR punpckhwd, mmx, 0, 0, 0
|
|
||||||
AVX_INSTR punpckhdq, mmx, 0, 0, 0
|
AVX_INSTR punpckhdq, mmx, 0, 0, 0
|
||||||
AVX_INSTR punpckhqdq, sse2, 0, 0, 0
|
AVX_INSTR punpckhqdq, sse2, 0, 0, 0
|
||||||
|
AVX_INSTR punpckhwd, mmx, 0, 0, 0
|
||||||
AVX_INSTR punpcklbw, mmx, 0, 0, 0
|
AVX_INSTR punpcklbw, mmx, 0, 0, 0
|
||||||
AVX_INSTR punpcklwd, mmx, 0, 0, 0
|
|
||||||
AVX_INSTR punpckldq, mmx, 0, 0, 0
|
AVX_INSTR punpckldq, mmx, 0, 0, 0
|
||||||
AVX_INSTR punpcklqdq, sse2, 0, 0, 0
|
AVX_INSTR punpcklqdq, sse2, 0, 0, 0
|
||||||
|
AVX_INSTR punpcklwd, mmx, 0, 0, 0
|
||||||
AVX_INSTR pxor, mmx, 0, 0, 1
|
AVX_INSTR pxor, mmx, 0, 0, 1
|
||||||
AVX_INSTR rcpps, sse, 1
|
AVX_INSTR rcpps, sse, 1
|
||||||
AVX_INSTR rcpss, sse, 1, 0, 0
|
AVX_INSTR rcpss, sse, 1, 0, 0
|
||||||
@ -1674,8 +1665,8 @@ AVX_INSTR xorps, sse, 1, 0, 1
|
|||||||
|
|
||||||
; 3DNow instructions, for sharing code between AVX, SSE and 3DN
|
; 3DNow instructions, for sharing code between AVX, SSE and 3DN
|
||||||
AVX_INSTR pfadd, 3dnow, 1, 0, 1
|
AVX_INSTR pfadd, 3dnow, 1, 0, 1
|
||||||
AVX_INSTR pfsub, 3dnow, 1, 0, 0
|
|
||||||
AVX_INSTR pfmul, 3dnow, 1, 0, 1
|
AVX_INSTR pfmul, 3dnow, 1, 0, 1
|
||||||
|
AVX_INSTR pfsub, 3dnow, 1, 0, 0
|
||||||
|
|
||||||
;%1 == instruction
|
;%1 == instruction
|
||||||
;%2 == minimal instruction set
|
;%2 == minimal instruction set
|
||||||
@ -1740,9 +1731,9 @@ GPR_INSTR shrx, bmi2
|
|||||||
%endmacro
|
%endmacro
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
FMA_INSTR pmacsww, pmullw, paddw
|
FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation
|
||||||
FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation
|
FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation
|
||||||
FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation
|
FMA_INSTR pmacsww, pmullw, paddw
|
||||||
FMA_INSTR pmadcswd, pmaddwd, paddd
|
FMA_INSTR pmadcswd, pmaddwd, paddd
|
||||||
|
|
||||||
; Macros for consolidating FMA3 and FMA4 using 4-operand (dst, src1, src2, src3) syntax.
|
; Macros for consolidating FMA3 and FMA4 using 4-operand (dst, src1, src2, src3) syntax.
|
||||||
|
23
third_party/dav1d/src/obu.c
vendored
23
third_party/dav1d/src/obu.c
vendored
@ -1198,7 +1198,6 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in, const int globa
|
|||||||
|
|
||||||
const unsigned init_bit_pos = dav1d_get_bits_pos(&gb);
|
const unsigned init_bit_pos = dav1d_get_bits_pos(&gb);
|
||||||
const unsigned init_byte_pos = init_bit_pos >> 3;
|
const unsigned init_byte_pos = init_bit_pos >> 3;
|
||||||
const unsigned pkt_bytelen = init_byte_pos + len;
|
|
||||||
|
|
||||||
// We must have read a whole number of bytes at this point (1 byte
|
// We must have read a whole number of bytes at this point (1 byte
|
||||||
// for the header and whole bytes at a time when reading the
|
// for the header and whole bytes at a time when reading the
|
||||||
@ -1342,6 +1341,7 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in, const int globa
|
|||||||
// The current bit position is a multiple of 8 (because we
|
// The current bit position is a multiple of 8 (because we
|
||||||
// just aligned it) and less than 8*pkt_bytelen because
|
// just aligned it) and less than 8*pkt_bytelen because
|
||||||
// otherwise the overrun check would have fired.
|
// otherwise the overrun check would have fired.
|
||||||
|
const unsigned pkt_bytelen = init_byte_pos + len;
|
||||||
const unsigned bit_pos = dav1d_get_bits_pos(&gb);
|
const unsigned bit_pos = dav1d_get_bits_pos(&gb);
|
||||||
assert((bit_pos & 7) == 0);
|
assert((bit_pos & 7) == 0);
|
||||||
assert(pkt_bytelen >= (bit_pos >> 3));
|
assert(pkt_bytelen >= (bit_pos >> 3));
|
||||||
@ -1368,17 +1368,12 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in, const int globa
|
|||||||
const enum ObuMetaType meta_type = dav1d_get_uleb128(&gb);
|
const enum ObuMetaType meta_type = dav1d_get_uleb128(&gb);
|
||||||
const int meta_type_len = (dav1d_get_bits_pos(&gb) - init_bit_pos) >> 3;
|
const int meta_type_len = (dav1d_get_bits_pos(&gb) - init_bit_pos) >> 3;
|
||||||
if (gb.error) goto error;
|
if (gb.error) goto error;
|
||||||
Dav1dRef *ref;
|
|
||||||
Dav1dContentLightLevel *content_light;
|
|
||||||
Dav1dMasteringDisplay *mastering_display;
|
|
||||||
Dav1dITUTT35 *itut_t35_metadata;
|
|
||||||
|
|
||||||
switch (meta_type) {
|
switch (meta_type) {
|
||||||
case OBU_META_HDR_CLL:
|
case OBU_META_HDR_CLL: {
|
||||||
ref = dav1d_ref_create(sizeof(Dav1dContentLightLevel));
|
Dav1dRef *ref = dav1d_ref_create(sizeof(Dav1dContentLightLevel));
|
||||||
if (!ref) return DAV1D_ERR(ENOMEM);
|
if (!ref) return DAV1D_ERR(ENOMEM);
|
||||||
content_light = ref->data;
|
Dav1dContentLightLevel *const content_light = ref->data;
|
||||||
memset(content_light, 0, sizeof(*content_light));
|
|
||||||
|
|
||||||
content_light->max_content_light_level = dav1d_get_bits(&gb, 16);
|
content_light->max_content_light_level = dav1d_get_bits(&gb, 16);
|
||||||
content_light->max_frame_average_light_level = dav1d_get_bits(&gb, 16);
|
content_light->max_frame_average_light_level = dav1d_get_bits(&gb, 16);
|
||||||
@ -1395,11 +1390,11 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in, const int globa
|
|||||||
c->content_light = content_light;
|
c->content_light = content_light;
|
||||||
c->content_light_ref = ref;
|
c->content_light_ref = ref;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case OBU_META_HDR_MDCV: {
|
case OBU_META_HDR_MDCV: {
|
||||||
ref = dav1d_ref_create(sizeof(Dav1dMasteringDisplay));
|
Dav1dRef *ref = dav1d_ref_create(sizeof(Dav1dMasteringDisplay));
|
||||||
if (!ref) return DAV1D_ERR(ENOMEM);
|
if (!ref) return DAV1D_ERR(ENOMEM);
|
||||||
mastering_display = ref->data;
|
Dav1dMasteringDisplay *const mastering_display = ref->data;
|
||||||
memset(mastering_display, 0, sizeof(*mastering_display));
|
|
||||||
|
|
||||||
for (int i = 0; i < 3; i++) {
|
for (int i = 0; i < 3; i++) {
|
||||||
mastering_display->primaries[i][0] = dav1d_get_bits(&gb, 16);
|
mastering_display->primaries[i][0] = dav1d_get_bits(&gb, 16);
|
||||||
@ -1447,9 +1442,9 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in, const int globa
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
ref = dav1d_ref_create(sizeof(Dav1dITUTT35) + payload_size * sizeof(uint8_t));
|
Dav1dRef *ref = dav1d_ref_create(sizeof(Dav1dITUTT35) + payload_size * sizeof(uint8_t));
|
||||||
if (!ref) return DAV1D_ERR(ENOMEM);
|
if (!ref) return DAV1D_ERR(ENOMEM);
|
||||||
itut_t35_metadata = ref->data;
|
Dav1dITUTT35 *const itut_t35_metadata = ref->data;
|
||||||
|
|
||||||
// We need our public headers to be C++ compatible, so payload can't be
|
// We need our public headers to be C++ compatible, so payload can't be
|
||||||
// a flexible array member
|
// a flexible array member
|
||||||
|
1
third_party/dav1d/src/x86/cdef_avx2.asm
vendored
1
third_party/dav1d/src/x86/cdef_avx2.asm
vendored
@ -23,6 +23,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
|
1
third_party/dav1d/src/x86/cdef_avx512.asm
vendored
1
third_party/dav1d/src/x86/cdef_avx512.asm
vendored
@ -23,6 +23,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
%if HAVE_AVX512ICL && ARCH_X86_64
|
%if HAVE_AVX512ICL && ARCH_X86_64
|
||||||
|
1
third_party/dav1d/src/x86/cdef_sse.asm
vendored
1
third_party/dav1d/src/x86/cdef_sse.asm
vendored
@ -24,6 +24,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
SECTION_RODATA 16
|
SECTION_RODATA 16
|
||||||
|
1
third_party/dav1d/src/x86/cpuid.asm
vendored
1
third_party/dav1d/src/x86/cpuid.asm
vendored
@ -23,6 +23,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
|
1
third_party/dav1d/src/x86/film_grain.asm
vendored
1
third_party/dav1d/src/x86/film_grain.asm
vendored
@ -23,6 +23,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
SECTION_RODATA
|
SECTION_RODATA
|
||||||
|
1
third_party/dav1d/src/x86/ipred.asm
vendored
1
third_party/dav1d/src/x86/ipred.asm
vendored
@ -23,6 +23,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
|
1
third_party/dav1d/src/x86/ipred_ssse3.asm
vendored
1
third_party/dav1d/src/x86/ipred_ssse3.asm
vendored
@ -23,6 +23,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
SECTION_RODATA 16
|
SECTION_RODATA 16
|
||||||
|
1
third_party/dav1d/src/x86/itx.asm
vendored
1
third_party/dav1d/src/x86/itx.asm
vendored
@ -23,6 +23,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
|
1
third_party/dav1d/src/x86/itx_ssse3.asm
vendored
1
third_party/dav1d/src/x86/itx_ssse3.asm
vendored
@ -23,6 +23,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
|
|
||||||
|
1
third_party/dav1d/src/x86/loopfilter.asm
vendored
1
third_party/dav1d/src/x86/loopfilter.asm
vendored
@ -23,6 +23,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
SECTION_RODATA 16
|
SECTION_RODATA 16
|
||||||
|
509
third_party/dav1d/src/x86/mc_avx2.asm
vendored
509
third_party/dav1d/src/x86/mc_avx2.asm
vendored
@ -23,6 +23,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
@ -2766,20 +2767,20 @@ cglobal prep_8tap, 3, 8, 0, tmp, src, stride, w, h, mx, my, stride3
|
|||||||
%ifidn %1, put
|
%ifidn %1, put
|
||||||
%assign isprep 0
|
%assign isprep 0
|
||||||
%if required_stack_alignment <= STACK_ALIGNMENT
|
%if required_stack_alignment <= STACK_ALIGNMENT
|
||||||
cglobal put_8tap_scaled, 4, 15, 16, 96, dst, ds, src, ss, w, h, mx, my, dx, dy
|
cglobal put_8tap_scaled, 4, 15, 16, 112, dst, ds, src, ss, w, h, mx, my, dx, dy
|
||||||
%else
|
%else
|
||||||
cglobal put_8tap_scaled, 4, 14, 16, 112, dst, ds, src, ss, w, h, mx, my, dx, dy
|
cglobal put_8tap_scaled, 4, 14, 16, 128, dst, ds, src, ss, w, h, mx, my, dx, dy
|
||||||
%endif
|
%endif
|
||||||
%xdefine base_reg r12
|
%xdefine base_reg r12
|
||||||
%define rndshift 10
|
%define rndshift 10
|
||||||
%else
|
%else
|
||||||
%assign isprep 1
|
%assign isprep 1
|
||||||
%if required_stack_alignment <= STACK_ALIGNMENT
|
%if required_stack_alignment <= STACK_ALIGNMENT
|
||||||
cglobal prep_8tap_scaled, 4, 15, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
cglobal prep_8tap_scaled, 4, 15, 16, 128, tmp, src, ss, w, h, mx, my, dx, dy
|
||||||
%xdefine tmp_stridem r14q
|
%xdefine tmp_stridem r14q
|
||||||
%else
|
%else
|
||||||
cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
cglobal prep_8tap_scaled, 4, 14, 16, 128, tmp, src, ss, w, h, mx, my, dx, dy
|
||||||
%define tmp_stridem qword [rsp+104]
|
%define tmp_stridem qword [rsp+120]
|
||||||
%endif
|
%endif
|
||||||
%xdefine base_reg r11
|
%xdefine base_reg r11
|
||||||
%define rndshift 6
|
%define rndshift 6
|
||||||
@ -2808,7 +2809,7 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
|||||||
%define hm r6m
|
%define hm r6m
|
||||||
%endif
|
%endif
|
||||||
%if required_stack_alignment > STACK_ALIGNMENT
|
%if required_stack_alignment > STACK_ALIGNMENT
|
||||||
%define dsm [rsp+96]
|
%define dsm [rsp+112]
|
||||||
%define rX r1
|
%define rX r1
|
||||||
%define rXd r1d
|
%define rXd r1d
|
||||||
%else
|
%else
|
||||||
@ -2824,7 +2825,7 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
|||||||
%define dxm r7m
|
%define dxm r7m
|
||||||
%else
|
%else
|
||||||
DEFINE_ARGS tmp, src, ss, w, h, _, my, dx, dy, ss3
|
DEFINE_ARGS tmp, src, ss, w, h, _, my, dx, dy, ss3
|
||||||
%define hm [rsp+96]
|
%define hm [rsp+112]
|
||||||
%endif
|
%endif
|
||||||
MCT_8TAP_SCALED_REMAP_REGS_TO_PREV
|
MCT_8TAP_SCALED_REMAP_REGS_TO_PREV
|
||||||
%define rX r14
|
%define rX r14
|
||||||
@ -3104,181 +3105,9 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
|||||||
lea srcq, [srcq+ssq*2]
|
lea srcq, [srcq+ssq*2]
|
||||||
jmp .w4_loop
|
jmp .w4_loop
|
||||||
.w8:
|
.w8:
|
||||||
%ifidn %1, put
|
mov dword [rsp+48], 1
|
||||||
movifnidn dsm, dsq
|
movifprep tmp_stridem, 16
|
||||||
%endif
|
jmp .w_start
|
||||||
shr t0d, 16
|
|
||||||
sub srcq, 3
|
|
||||||
movd xm15, t0d
|
|
||||||
pmaddwd m8, [base+rescale_mul]
|
|
||||||
vpbroadcastq m11, [base+pq_0x40000000]
|
|
||||||
vpbroadcastd m15, xm15
|
|
||||||
paddd m14, m8 ; mx+dx*[0-7]
|
|
||||||
pand m6, m14, m10
|
|
||||||
psrld m6, 6
|
|
||||||
paddd m15, m6
|
|
||||||
pcmpeqd m6, m9
|
|
||||||
vextracti128 xm7, m15, 1
|
|
||||||
movd r4d, xm15
|
|
||||||
pextrd r6d, xm15, 2
|
|
||||||
pextrd r7d, xm15, 1
|
|
||||||
pextrd r9d, xm15, 3
|
|
||||||
movd r10d, xm7
|
|
||||||
pextrd r11d, xm7, 2
|
|
||||||
pextrd r13d, xm7, 1
|
|
||||||
pextrd rXd, xm7, 3
|
|
||||||
movq xm15, [base+subpel_filters+r4*8]
|
|
||||||
movq xm10, [base+subpel_filters+r6*8]
|
|
||||||
movhps xm15, [base+subpel_filters+r7*8]
|
|
||||||
movhps xm10, [base+subpel_filters+r9*8]
|
|
||||||
vinserti128 m15, [base+subpel_filters+r10*8], 1
|
|
||||||
vinserti128 m10, [base+subpel_filters+r11*8], 1
|
|
||||||
vpbroadcastq m9, [base+subpel_filters+r13*8]
|
|
||||||
vpbroadcastq m8, [base+subpel_filters+rX*8]
|
|
||||||
psrld m14, 10
|
|
||||||
mova [rsp], xm14
|
|
||||||
vextracti128 xm7, m14, 1
|
|
||||||
movd r4d, xm14
|
|
||||||
pextrd r6d, xm14, 2
|
|
||||||
pextrd r7d, xm14, 1
|
|
||||||
pextrd r9d, xm14, 3
|
|
||||||
movd r10d, xm7
|
|
||||||
pextrd r11d, xm7, 2
|
|
||||||
pextrd r13d, xm7, 1
|
|
||||||
pextrd rXd, xm7, 3
|
|
||||||
pshufd m5, m6, q1100
|
|
||||||
pshufd m6, m6, q3322
|
|
||||||
vpblendd m15, m9, 0xc0
|
|
||||||
vpblendd m10, m8, 0xc0
|
|
||||||
pblendvb m15, m11, m5
|
|
||||||
pblendvb m10, m11, m6
|
|
||||||
vbroadcasti128 m14, [base+subpel_s_shuf8]
|
|
||||||
MC_8TAP_SCALED_H 0, 1, 2, 3, 4, 5, 6, 7 ; 0a 1a 0b 1b
|
|
||||||
MC_8TAP_SCALED_H 1, 2, 3, 4, 5, 6, 7, 8 ; 2a 3a 2b 3b
|
|
||||||
MC_8TAP_SCALED_H 2, 3, 4, 5, 6, 7, 8, 9 ; 4a 5a 4b 5b
|
|
||||||
MC_8TAP_SCALED_H 3, 4, 5, 6, 7, 8, 9, 11 ; 6a 7a 6b 7b
|
|
||||||
mov myd, mym
|
|
||||||
mov dyd, dym
|
|
||||||
pshufb m0, m14 ; 01a 01b
|
|
||||||
pshufb m1, m14 ; 23a 23b
|
|
||||||
pshufb m2, m14 ; 45a 45b
|
|
||||||
pshufb m3, m14 ; 67a 67b
|
|
||||||
vbroadcasti128 m14, [base+wswap]
|
|
||||||
.w8_loop:
|
|
||||||
and myd, 0x3ff
|
|
||||||
mov r6d, 64 << 24
|
|
||||||
mov r4d, myd
|
|
||||||
shr r4d, 6
|
|
||||||
lea r4d, [t1+r4]
|
|
||||||
cmovnz r6q, [base+subpel_filters+r4*8]
|
|
||||||
movq xm11, r6q
|
|
||||||
punpcklbw xm11, xm11
|
|
||||||
psraw xm11, 8
|
|
||||||
vinserti128 m11, xm11, 1
|
|
||||||
pshufd m8, m11, q0000
|
|
||||||
pshufd m9, m11, q1111
|
|
||||||
pmaddwd m4, m0, m8
|
|
||||||
pmaddwd m5, m1, m9
|
|
||||||
pshufd m8, m11, q2222
|
|
||||||
pshufd m11, m11, q3333
|
|
||||||
pmaddwd m6, m2, m8
|
|
||||||
pmaddwd m7, m3, m11
|
|
||||||
paddd m4, m5
|
|
||||||
paddd m6, m7
|
|
||||||
paddd m4, m13
|
|
||||||
paddd m4, m6
|
|
||||||
psrad m4, rndshift
|
|
||||||
vextracti128 xm5, m4, 1
|
|
||||||
packssdw xm4, xm5
|
|
||||||
%ifidn %1, put
|
|
||||||
packuswb xm4, xm4
|
|
||||||
movq [dstq], xm4
|
|
||||||
add dstq, dsm
|
|
||||||
%else
|
|
||||||
mova [tmpq], xm4
|
|
||||||
add tmpq, 16
|
|
||||||
%endif
|
|
||||||
dec hd
|
|
||||||
jz .ret
|
|
||||||
add myd, dyd
|
|
||||||
test myd, ~0x3ff
|
|
||||||
jz .w8_loop
|
|
||||||
test myd, 0x400
|
|
||||||
mov [rsp+16], myd
|
|
||||||
mov r4d, [rsp+ 0]
|
|
||||||
mov r6d, [rsp+ 8]
|
|
||||||
mov r7d, [rsp+ 4]
|
|
||||||
mov r9d, [rsp+12]
|
|
||||||
jz .w8_skip_line
|
|
||||||
vpbroadcastq m6, [srcq+r13]
|
|
||||||
vpbroadcastq m7, [srcq+ rX]
|
|
||||||
movq xm4, [srcq+ r4]
|
|
||||||
movq xm5, [srcq+ r6]
|
|
||||||
movhps xm4, [srcq+ r7]
|
|
||||||
movhps xm5, [srcq+ r9]
|
|
||||||
vinserti128 m4, [srcq+r10], 1
|
|
||||||
vinserti128 m5, [srcq+r11], 1
|
|
||||||
add srcq, ssq
|
|
||||||
mov myd, [rsp+16]
|
|
||||||
mov dyd, dym
|
|
||||||
pshufb m0, m14
|
|
||||||
pshufb m1, m14
|
|
||||||
pshufb m2, m14
|
|
||||||
pshufb m3, m14
|
|
||||||
vpblendd m4, m6, 0xc0
|
|
||||||
vpblendd m5, m7, 0xc0
|
|
||||||
pmaddubsw m4, m15
|
|
||||||
pmaddubsw m5, m10
|
|
||||||
phaddw m4, m5
|
|
||||||
pslld m5, m4, 16
|
|
||||||
paddw m4, m5
|
|
||||||
pmulhrsw m4, m12
|
|
||||||
pblendw m0, m1, 0xaa
|
|
||||||
pblendw m1, m2, 0xaa
|
|
||||||
pblendw m2, m3, 0xaa
|
|
||||||
pblendw m3, m4, 0xaa
|
|
||||||
jmp .w8_loop
|
|
||||||
.w8_skip_line:
|
|
||||||
mova m0, m1
|
|
||||||
mova m1, m2
|
|
||||||
mova m2, m3
|
|
||||||
vpbroadcastq m7, [srcq+r13]
|
|
||||||
vpbroadcastq m8, [srcq+ rX]
|
|
||||||
movq xm3, [srcq+ r4]
|
|
||||||
movq xm4, [srcq+ r6]
|
|
||||||
movhps xm3, [srcq+ r7]
|
|
||||||
movhps xm4, [srcq+ r9]
|
|
||||||
vinserti128 m3, [srcq+r10], 1
|
|
||||||
vinserti128 m4, [srcq+r11], 1
|
|
||||||
add srcq, ssq
|
|
||||||
movq xm5, [srcq+ r4]
|
|
||||||
movq xm6, [srcq+ r6]
|
|
||||||
movhps xm5, [srcq+ r7]
|
|
||||||
movhps xm6, [srcq+ r9]
|
|
||||||
vinserti128 m5, [srcq+r10], 1
|
|
||||||
vinserti128 m6, [srcq+r11], 1
|
|
||||||
vpbroadcastq m9, [srcq+r13]
|
|
||||||
vpbroadcastq m11, [srcq+ rX]
|
|
||||||
add srcq, ssq
|
|
||||||
mov myd, [rsp+16]
|
|
||||||
mov dyd, dym
|
|
||||||
vpblendd m3, m7, 0xc0
|
|
||||||
vpblendd m4, m8, 0xc0
|
|
||||||
vpblendd m5, m9, 0xc0
|
|
||||||
vpblendd m6, m11, 0xc0
|
|
||||||
pmaddubsw m3, m15
|
|
||||||
pmaddubsw m4, m10
|
|
||||||
pmaddubsw m5, m15
|
|
||||||
pmaddubsw m6, m10
|
|
||||||
phaddw m3, m4
|
|
||||||
phaddw m5, m6
|
|
||||||
psrld m4, m3, 16
|
|
||||||
pslld m6, m5, 16
|
|
||||||
paddw m3, m4
|
|
||||||
paddw m5, m6
|
|
||||||
pblendw m3, m5, 0xaa
|
|
||||||
pmulhrsw m3, m12
|
|
||||||
jmp .w8_loop
|
|
||||||
.w16:
|
.w16:
|
||||||
mov dword [rsp+48], 2
|
mov dword [rsp+48], 2
|
||||||
movifprep tmp_stridem, 32
|
movifprep tmp_stridem, 32
|
||||||
@ -3698,127 +3527,9 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
|||||||
jg .dy1_w4_loop
|
jg .dy1_w4_loop
|
||||||
MC_8TAP_SCALED_RET
|
MC_8TAP_SCALED_RET
|
||||||
.dy1_w8:
|
.dy1_w8:
|
||||||
%ifidn %1, put
|
mov dword [rsp+72], 1
|
||||||
movifnidn dsm, dsq
|
movifprep tmp_stridem, 16
|
||||||
%endif
|
jmp .dy1_w_start
|
||||||
shr t0d, 16
|
|
||||||
sub srcq, 3
|
|
||||||
movd xm15, t0d
|
|
||||||
pmaddwd m8, [base+rescale_mul]
|
|
||||||
vpbroadcastq m11, [base+pq_0x40000000]
|
|
||||||
vpbroadcastd m15, xm15
|
|
||||||
paddd m14, m8 ; mx+dx*[0-7]
|
|
||||||
pand m6, m14, m10
|
|
||||||
psrld m6, 6
|
|
||||||
paddd m15, m6
|
|
||||||
pcmpeqd m6, m9
|
|
||||||
vextracti128 xm7, m15, 1
|
|
||||||
movd r4d, xm15
|
|
||||||
pextrd r6d, xm15, 2
|
|
||||||
pextrd r7d, xm15, 1
|
|
||||||
pextrd r9d, xm15, 3
|
|
||||||
movd r10d, xm7
|
|
||||||
pextrd r11d, xm7, 2
|
|
||||||
pextrd r13d, xm7, 1
|
|
||||||
pextrd rXd, xm7, 3
|
|
||||||
movq xm15, [base+subpel_filters+ r4*8]
|
|
||||||
movq xm10, [base+subpel_filters+ r6*8]
|
|
||||||
movhps xm15, [base+subpel_filters+ r7*8]
|
|
||||||
movhps xm10, [base+subpel_filters+ r9*8]
|
|
||||||
vinserti128 m15, [base+subpel_filters+r10*8], 1
|
|
||||||
vinserti128 m10, [base+subpel_filters+r11*8], 1
|
|
||||||
vpbroadcastq m9, [base+subpel_filters+r13*8]
|
|
||||||
vpbroadcastq m8, [base+subpel_filters+ rX*8]
|
|
||||||
psrld m14, 10
|
|
||||||
vextracti128 xm7, m14, 1
|
|
||||||
movd r4d, xm14
|
|
||||||
pextrd r6d, xm14, 2
|
|
||||||
pextrd r7d, xm14, 1
|
|
||||||
pextrd r9d, xm14, 3
|
|
||||||
movd r10d, xm7
|
|
||||||
pextrd r11d, xm7, 2
|
|
||||||
pextrd r13d, xm7, 1
|
|
||||||
pextrd rXd, xm7, 3
|
|
||||||
mov [rsp+32], r7d
|
|
||||||
pshufd m5, m6, q1100
|
|
||||||
pshufd m6, m6, q3322
|
|
||||||
vpblendd m15, m9, 0xc0
|
|
||||||
vpblendd m10, m8, 0xc0
|
|
||||||
pblendvb m15, m11, m5
|
|
||||||
pblendvb m10, m11, m6
|
|
||||||
vbroadcasti128 m14, [base+subpel_s_shuf8]
|
|
||||||
MC_8TAP_SCALED_H 0, 1, 2, 3, 4, 5, 6, 7 ; 0a 1a 0b 1b
|
|
||||||
MC_8TAP_SCALED_H 1, 2, 3, 4, 5, 6, 7, 8 ; 2a 3a 2b 3b
|
|
||||||
MC_8TAP_SCALED_H 2, 3, 4, 5, 6, 7, 8, 9 ; 4a 5a 4b 5b
|
|
||||||
MC_8TAP_SCALED_H 3, 4, 5, 6, 7, 8, 9, 11 ; 6a 7a 6b 7b
|
|
||||||
mov myd, mym
|
|
||||||
movu [rsp], m10
|
|
||||||
pshufb m0, m14 ; 01a 01b
|
|
||||||
pshufb m1, m14 ; 23a 23b
|
|
||||||
pshufb m2, m14 ; 45a 45b
|
|
||||||
pshufb m3, m14 ; 67a 67b
|
|
||||||
shr myd, 6
|
|
||||||
lea myd, [t1+myq]
|
|
||||||
mov t1d, 64 << 24
|
|
||||||
cmovnz t1q, [base+subpel_filters+myq*8]
|
|
||||||
vbroadcasti128 m14, [base+wswap]
|
|
||||||
movq xm11, t1q
|
|
||||||
punpcklbw xm11, xm11
|
|
||||||
psraw xm11, 8
|
|
||||||
vinserti128 m11, xm11, 1
|
|
||||||
mov r7d, [rsp+32]
|
|
||||||
pshufd m8, m11, q0000
|
|
||||||
pshufd m9, m11, q1111
|
|
||||||
pshufd m10, m11, q2222
|
|
||||||
pshufd m11, m11, q3333
|
|
||||||
.dy1_w8_loop:
|
|
||||||
pmaddwd m4, m0, m8
|
|
||||||
pmaddwd m5, m1, m9
|
|
||||||
pmaddwd m6, m2, m10
|
|
||||||
pmaddwd m7, m3, m11
|
|
||||||
paddd m4, m5
|
|
||||||
paddd m6, m7
|
|
||||||
paddd m4, m13
|
|
||||||
paddd m4, m6
|
|
||||||
psrad m4, rndshift
|
|
||||||
vextracti128 xm5, m4, 1
|
|
||||||
packssdw xm4, xm5
|
|
||||||
%ifidn %1, put
|
|
||||||
packuswb xm4, xm4
|
|
||||||
movq [dstq], xm4
|
|
||||||
add dstq, dsm
|
|
||||||
%else
|
|
||||||
mova [tmpq], xm4
|
|
||||||
add tmpq, 16
|
|
||||||
%endif
|
|
||||||
dec hd
|
|
||||||
jz .ret
|
|
||||||
movq xm4, [srcq+ r4]
|
|
||||||
movq xm5, [srcq+ r6]
|
|
||||||
movhps xm4, [srcq+ r7]
|
|
||||||
movhps xm5, [srcq+ r9]
|
|
||||||
vinserti128 m4, [srcq+r10], 1
|
|
||||||
vinserti128 m5, [srcq+r11], 1
|
|
||||||
vpbroadcastq m6, [srcq+r13]
|
|
||||||
vpbroadcastq m7, [srcq+ rX]
|
|
||||||
add srcq, ssq
|
|
||||||
pshufb m0, m14
|
|
||||||
pshufb m1, m14
|
|
||||||
pshufb m2, m14
|
|
||||||
pshufb m3, m14
|
|
||||||
vpblendd m4, m6, 0xc0
|
|
||||||
vpblendd m5, m7, 0xc0
|
|
||||||
pmaddubsw m4, m15
|
|
||||||
pmaddubsw m5, [rsp]
|
|
||||||
phaddw m4, m5
|
|
||||||
pslld m5, m4, 16
|
|
||||||
paddw m4, m5
|
|
||||||
pmulhrsw m4, m12
|
|
||||||
pblendw m0, m1, 0xaa
|
|
||||||
pblendw m1, m2, 0xaa
|
|
||||||
pblendw m2, m3, 0xaa
|
|
||||||
pblendw m3, m4, 0xaa
|
|
||||||
jmp .dy1_w8_loop
|
|
||||||
.dy1_w16:
|
.dy1_w16:
|
||||||
mov dword [rsp+72], 2
|
mov dword [rsp+72], 2
|
||||||
movifprep tmp_stridem, 32
|
movifprep tmp_stridem, 32
|
||||||
@ -3835,11 +3546,16 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
|||||||
mov dword [rsp+72], 16
|
mov dword [rsp+72], 16
|
||||||
movifprep tmp_stridem, 256
|
movifprep tmp_stridem, 256
|
||||||
.dy1_w_start:
|
.dy1_w_start:
|
||||||
|
mov myd, mym
|
||||||
%ifidn %1, put
|
%ifidn %1, put
|
||||||
movifnidn dsm, dsq
|
movifnidn dsm, dsq
|
||||||
%endif
|
%endif
|
||||||
shr t0d, 16
|
shr t0d, 16
|
||||||
sub srcq, 3
|
sub srcq, 3
|
||||||
|
shr myd, 6
|
||||||
|
mov r4d, 64 << 24
|
||||||
|
lea myd, [t1+myq]
|
||||||
|
cmovnz r4q, [base+subpel_filters+myq*8]
|
||||||
pmaddwd m8, [base+rescale_mul]
|
pmaddwd m8, [base+rescale_mul]
|
||||||
movd xm15, t0d
|
movd xm15, t0d
|
||||||
mov [rsp+76], t0d
|
mov [rsp+76], t0d
|
||||||
@ -3851,6 +3567,10 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
|||||||
shl dword dxm, 3 ; dx*8
|
shl dword dxm, 3 ; dx*8
|
||||||
vpbroadcastd m15, xm15
|
vpbroadcastd m15, xm15
|
||||||
paddd m14, m8 ; mx+dx*[0-7]
|
paddd m14, m8 ; mx+dx*[0-7]
|
||||||
|
movq xm0, r4q
|
||||||
|
punpcklbw xm0, xm0
|
||||||
|
psraw xm0, 8
|
||||||
|
mova [rsp+96], xm0
|
||||||
jmp .dy1_hloop
|
jmp .dy1_hloop
|
||||||
.dy1_hloop_prep:
|
.dy1_hloop_prep:
|
||||||
dec dword [rsp+72]
|
dec dword [rsp+72]
|
||||||
@ -3910,27 +3630,16 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
|||||||
MC_8TAP_SCALED_H 1, 2, 3, 4, 5, 6, 7, 8 ; 2a 3a 2b 3b
|
MC_8TAP_SCALED_H 1, 2, 3, 4, 5, 6, 7, 8 ; 2a 3a 2b 3b
|
||||||
MC_8TAP_SCALED_H 2, 3, 4, 5, 6, 7, 8, 9 ; 4a 5a 4b 5b
|
MC_8TAP_SCALED_H 2, 3, 4, 5, 6, 7, 8, 9 ; 4a 5a 4b 5b
|
||||||
MC_8TAP_SCALED_H 3, 4, 5, 6, 7, 8, 9, 11 ; 6a 7a 6b 7b
|
MC_8TAP_SCALED_H 3, 4, 5, 6, 7, 8, 9, 11 ; 6a 7a 6b 7b
|
||||||
mov myd, mym
|
|
||||||
movu [rsp], m10
|
movu [rsp], m10
|
||||||
|
vpbroadcastd m8, [rsp+0x60]
|
||||||
|
vpbroadcastd m9, [rsp+0x64]
|
||||||
|
vpbroadcastd m10, [rsp+0x68]
|
||||||
|
vpbroadcastd m11, [rsp+0x6c]
|
||||||
pshufb m0, m14 ; 01a 01b
|
pshufb m0, m14 ; 01a 01b
|
||||||
pshufb m1, m14 ; 23a 23b
|
pshufb m1, m14 ; 23a 23b
|
||||||
pshufb m2, m14 ; 45a 45b
|
pshufb m2, m14 ; 45a 45b
|
||||||
pshufb m3, m14 ; 67a 67b
|
pshufb m3, m14 ; 67a 67b
|
||||||
shr myd, 6
|
|
||||||
mov r4d, 64 << 24
|
|
||||||
lea myd, [t1+myq]
|
|
||||||
cmovnz r4q, [base+subpel_filters+myq*8]
|
|
||||||
vbroadcasti128 m14, [base+wswap]
|
vbroadcasti128 m14, [base+wswap]
|
||||||
movq xm11, r4q
|
|
||||||
punpcklbw xm11, xm11
|
|
||||||
psraw xm11, 8
|
|
||||||
vinserti128 m11, xm11, 1
|
|
||||||
mov r4d, [rsp+64]
|
|
||||||
mov r7d, [rsp+68]
|
|
||||||
pshufd m8, m11, q0000
|
|
||||||
pshufd m9, m11, q1111
|
|
||||||
pshufd m10, m11, q2222
|
|
||||||
pshufd m11, m11, q3333
|
|
||||||
.dy1_vloop:
|
.dy1_vloop:
|
||||||
pmaddwd m4, m0, m8
|
pmaddwd m4, m0, m8
|
||||||
pmaddwd m5, m1, m9
|
pmaddwd m5, m1, m9
|
||||||
@ -4182,137 +3891,9 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
|||||||
jg .dy2_w4_loop
|
jg .dy2_w4_loop
|
||||||
MC_8TAP_SCALED_RET
|
MC_8TAP_SCALED_RET
|
||||||
.dy2_w8:
|
.dy2_w8:
|
||||||
%ifidn %1, put
|
mov dword [rsp+40], 1
|
||||||
movifnidn dsm, dsq
|
movifprep tmp_stridem, 16
|
||||||
%endif
|
jmp .dy2_w_start
|
||||||
shr t0d, 16
|
|
||||||
sub srcq, 3
|
|
||||||
movd xm15, t0d
|
|
||||||
pmaddwd m8, [base+rescale_mul]
|
|
||||||
vpbroadcastq m11, [base+pq_0x40000000]
|
|
||||||
vpbroadcastd m15, xm15
|
|
||||||
paddd m14, m8 ; mx+dx*[0-7]
|
|
||||||
pand m6, m14, m10
|
|
||||||
psrld m6, 6
|
|
||||||
paddd m15, m6
|
|
||||||
pcmpeqd m6, m9
|
|
||||||
vextracti128 xm7, m15, 1
|
|
||||||
movd r4d, xm15
|
|
||||||
pextrd r6d, xm15, 2
|
|
||||||
pextrd r7d, xm15, 1
|
|
||||||
pextrd r9d, xm15, 3
|
|
||||||
movd r10d, xm7
|
|
||||||
pextrd r11d, xm7, 2
|
|
||||||
pextrd r13d, xm7, 1
|
|
||||||
pextrd rXd, xm7, 3
|
|
||||||
movq xm15, [base+subpel_filters+ r4*8]
|
|
||||||
movq xm10, [base+subpel_filters+ r6*8]
|
|
||||||
movhps xm15, [base+subpel_filters+ r7*8]
|
|
||||||
movhps xm10, [base+subpel_filters+ r9*8]
|
|
||||||
vinserti128 m15, [base+subpel_filters+r10*8], 1
|
|
||||||
vinserti128 m10, [base+subpel_filters+r11*8], 1
|
|
||||||
vpbroadcastq m9, [base+subpel_filters+r13*8]
|
|
||||||
vpbroadcastq m8, [base+subpel_filters+ rX*8]
|
|
||||||
psrld m14, 10
|
|
||||||
vextracti128 xm7, m14, 1
|
|
||||||
movd r4d, xm14
|
|
||||||
pextrd r6d, xm14, 2
|
|
||||||
pextrd r7d, xm14, 1
|
|
||||||
pextrd r9d, xm14, 3
|
|
||||||
movd r10d, xm7
|
|
||||||
pextrd r11d, xm7, 2
|
|
||||||
pextrd r13d, xm7, 1
|
|
||||||
pextrd rXd, xm7, 3
|
|
||||||
mov [rsp], r7d
|
|
||||||
pshufd m5, m6, q1100
|
|
||||||
pshufd m6, m6, q3322
|
|
||||||
vpblendd m15, m9, 0xc0
|
|
||||||
vpblendd m10, m8, 0xc0
|
|
||||||
pblendvb m15, m11, m5
|
|
||||||
pblendvb m10, m11, m6
|
|
||||||
vbroadcasti128 m14, [base+subpel_s_shuf8]
|
|
||||||
MC_8TAP_SCALED_H 0, 1, 2, 3, 4, 5, 6, 7 ; 0a 1a 0b 1b
|
|
||||||
MC_8TAP_SCALED_H 1, 2, 3, 4, 5, 6, 7, 8 ; 2a 3a 2b 3b
|
|
||||||
MC_8TAP_SCALED_H 2, 3, 4, 5, 6, 7, 8, 9 ; 4a 5a 4b 5b
|
|
||||||
MC_8TAP_SCALED_H 3, 4, 5, 6, 7, 8, 9, 11 ; 6a 7a 6b 7b
|
|
||||||
mov myd, mym
|
|
||||||
pshufb m0, m14 ; 01a 01b
|
|
||||||
pshufb m1, m14 ; 23a 23b
|
|
||||||
pshufb m2, m14 ; 45a 45b
|
|
||||||
pshufb m3, m14 ; 67a 67b
|
|
||||||
shr myd, 6
|
|
||||||
lea myd, [t1+myq]
|
|
||||||
mov t1d, 64 << 24
|
|
||||||
cmovnz t1q, [base+subpel_filters+myq*8]
|
|
||||||
movq xm11, t1q
|
|
||||||
punpcklbw xm11, xm11
|
|
||||||
psraw xm11, 8
|
|
||||||
vinserti128 m11, xm11, 1
|
|
||||||
mov r7d, [rsp]
|
|
||||||
pshufd m8, m11, q0000
|
|
||||||
pshufd m9, m11, q1111
|
|
||||||
pshufd m14, m11, q2222
|
|
||||||
pshufd m11, m11, q3333
|
|
||||||
.dy2_w8_loop:
|
|
||||||
pmaddwd m4, m0, m8
|
|
||||||
pmaddwd m5, m1, m9
|
|
||||||
pmaddwd m6, m2, m14
|
|
||||||
pmaddwd m7, m3, m11
|
|
||||||
paddd m4, m5
|
|
||||||
paddd m6, m7
|
|
||||||
paddd m4, m13
|
|
||||||
paddd m4, m6
|
|
||||||
psrad m4, rndshift
|
|
||||||
vextracti128 xm5, m4, 1
|
|
||||||
packssdw xm4, xm5
|
|
||||||
%ifidn %1, put
|
|
||||||
packuswb xm4, xm4
|
|
||||||
movq [dstq], xm4
|
|
||||||
add dstq, dsm
|
|
||||||
%else
|
|
||||||
mova [tmpq], xm4
|
|
||||||
add tmpq, 16
|
|
||||||
%endif
|
|
||||||
dec hd
|
|
||||||
jz .ret
|
|
||||||
mova m0, m1
|
|
||||||
mova m1, m2
|
|
||||||
mova m2, m3
|
|
||||||
movq xm3, [srcq+ r4]
|
|
||||||
movq xm4, [srcq+ r6]
|
|
||||||
movhps xm3, [srcq+ r7]
|
|
||||||
movhps xm4, [srcq+ r9]
|
|
||||||
vinserti128 m3, [srcq+r10], 1
|
|
||||||
vinserti128 m4, [srcq+r11], 1
|
|
||||||
vpbroadcastq m5, [srcq+r13]
|
|
||||||
vpbroadcastq m6, [srcq+ rX]
|
|
||||||
add srcq, ssq
|
|
||||||
vpblendd m3, m5, 0xc0
|
|
||||||
vpblendd m4, m6, 0xc0
|
|
||||||
pmaddubsw m3, m15
|
|
||||||
pmaddubsw m4, m10
|
|
||||||
phaddw m3, m4
|
|
||||||
movq xm4, [srcq+ r4]
|
|
||||||
movq xm5, [srcq+ r6]
|
|
||||||
movhps xm4, [srcq+ r7]
|
|
||||||
movhps xm5, [srcq+ r9]
|
|
||||||
vinserti128 m4, [srcq+r10], 1
|
|
||||||
vinserti128 m5, [srcq+r11], 1
|
|
||||||
vpbroadcastq m6, [srcq+r13]
|
|
||||||
vpbroadcastq m7, [srcq+ rX]
|
|
||||||
add srcq, ssq
|
|
||||||
vpblendd m4, m6, 0xc0
|
|
||||||
vpblendd m5, m7, 0xc0
|
|
||||||
pmaddubsw m4, m15
|
|
||||||
pmaddubsw m5, m10
|
|
||||||
phaddw m4, m5
|
|
||||||
psrld m5, m3, 16
|
|
||||||
pslld m6, m4, 16
|
|
||||||
paddw m3, m5
|
|
||||||
paddw m4, m6
|
|
||||||
pblendw m3, m4, 0xaa
|
|
||||||
pmulhrsw m3, m12
|
|
||||||
jmp .dy2_w8_loop
|
|
||||||
.dy2_w16:
|
.dy2_w16:
|
||||||
mov dword [rsp+40], 2
|
mov dword [rsp+40], 2
|
||||||
movifprep tmp_stridem, 32
|
movifprep tmp_stridem, 32
|
||||||
@ -4329,11 +3910,16 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
|||||||
mov dword [rsp+40], 16
|
mov dword [rsp+40], 16
|
||||||
movifprep tmp_stridem, 256
|
movifprep tmp_stridem, 256
|
||||||
.dy2_w_start:
|
.dy2_w_start:
|
||||||
|
mov myd, mym
|
||||||
%ifidn %1, put
|
%ifidn %1, put
|
||||||
movifnidn dsm, dsq
|
movifnidn dsm, dsq
|
||||||
%endif
|
%endif
|
||||||
shr t0d, 16
|
shr t0d, 16
|
||||||
sub srcq, 3
|
sub srcq, 3
|
||||||
|
shr myd, 6
|
||||||
|
mov r4d, 64 << 24
|
||||||
|
lea myd, [t1+myq]
|
||||||
|
cmovnz r4q, [base+subpel_filters+myq*8]
|
||||||
pmaddwd m8, [base+rescale_mul]
|
pmaddwd m8, [base+rescale_mul]
|
||||||
movd xm15, t0d
|
movd xm15, t0d
|
||||||
mov [rsp+64], t0d
|
mov [rsp+64], t0d
|
||||||
@ -4345,6 +3931,10 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
|||||||
shl dword dxm, 3 ; dx*8
|
shl dword dxm, 3 ; dx*8
|
||||||
vpbroadcastd m15, xm15
|
vpbroadcastd m15, xm15
|
||||||
paddd m14, m8 ; mx+dx*[0-7]
|
paddd m14, m8 ; mx+dx*[0-7]
|
||||||
|
movq xm0, r4q
|
||||||
|
punpcklbw xm0, xm0
|
||||||
|
psraw xm0, 8
|
||||||
|
mova [rsp+0x50], xm0
|
||||||
jmp .dy2_hloop
|
jmp .dy2_hloop
|
||||||
.dy2_hloop_prep:
|
.dy2_hloop_prep:
|
||||||
dec dword [rsp+40]
|
dec dword [rsp+40]
|
||||||
@ -4384,7 +3974,6 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
|||||||
vpbroadcastq m8, [base+subpel_filters+ rX*8]
|
vpbroadcastq m8, [base+subpel_filters+ rX*8]
|
||||||
psrld m14, 10
|
psrld m14, 10
|
||||||
vextracti128 xm7, m14, 1
|
vextracti128 xm7, m14, 1
|
||||||
movq [rsp+32], xm14
|
|
||||||
movd r4d, xm14
|
movd r4d, xm14
|
||||||
pextrd r6d, xm14, 2
|
pextrd r6d, xm14, 2
|
||||||
pextrd r7d, xm14, 1
|
pextrd r7d, xm14, 1
|
||||||
@ -4404,25 +3993,15 @@ cglobal prep_8tap_scaled, 4, 14, 16, 112, tmp, src, ss, w, h, mx, my, dx, dy
|
|||||||
MC_8TAP_SCALED_H 1, 2, 3, 4, 5, 6, 7, 8 ; 2a 3a 2b 3b
|
MC_8TAP_SCALED_H 1, 2, 3, 4, 5, 6, 7, 8 ; 2a 3a 2b 3b
|
||||||
MC_8TAP_SCALED_H 2, 3, 4, 5, 6, 7, 8, 9 ; 4a 5a 4b 5b
|
MC_8TAP_SCALED_H 2, 3, 4, 5, 6, 7, 8, 9 ; 4a 5a 4b 5b
|
||||||
MC_8TAP_SCALED_H 3, 4, 5, 6, 7, 8, 9, 11 ; 6a 7a 6b 7b
|
MC_8TAP_SCALED_H 3, 4, 5, 6, 7, 8, 9, 11 ; 6a 7a 6b 7b
|
||||||
mov myd, mym
|
vpbroadcastd m8, [rsp+0x50]
|
||||||
|
vpbroadcastd m9, [rsp+0x54]
|
||||||
|
vpbroadcastd m11, [rsp+0x58]
|
||||||
|
vpbroadcastd m4, [rsp+0x5c]
|
||||||
pshufb m0, m14 ; 01a 01b
|
pshufb m0, m14 ; 01a 01b
|
||||||
pshufb m1, m14 ; 23a 23b
|
pshufb m1, m14 ; 23a 23b
|
||||||
pshufb m2, m14 ; 45a 45b
|
pshufb m2, m14 ; 45a 45b
|
||||||
pshufb m3, m14 ; 67a 67b
|
pshufb m3, m14 ; 67a 67b
|
||||||
shr myd, 6
|
SWAP m14, m4
|
||||||
mov r4d, 64 << 24
|
|
||||||
lea myd, [t1+myq]
|
|
||||||
cmovnz r4q, [base+subpel_filters+myq*8]
|
|
||||||
movq xm14, r4q
|
|
||||||
punpcklbw xm14, xm14
|
|
||||||
psraw xm14, 8
|
|
||||||
vinserti128 m14, xm14, 1
|
|
||||||
mov r4d, [rsp+32]
|
|
||||||
mov r7d, [rsp+36]
|
|
||||||
pshufd m8, m14, q0000
|
|
||||||
pshufd m9, m14, q1111
|
|
||||||
pshufd m11, m14, q2222
|
|
||||||
pshufd m14, m14, q3333
|
|
||||||
.dy2_vloop:
|
.dy2_vloop:
|
||||||
pmaddwd m4, m0, m8
|
pmaddwd m4, m0, m8
|
||||||
pmaddwd m5, m1, m9
|
pmaddwd m5, m1, m9
|
||||||
|
1
third_party/dav1d/src/x86/mc_avx512.asm
vendored
1
third_party/dav1d/src/x86/mc_avx512.asm
vendored
@ -23,6 +23,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
%if HAVE_AVX512ICL && ARCH_X86_64
|
%if HAVE_AVX512ICL && ARCH_X86_64
|
||||||
|
44
third_party/dav1d/src/x86/mc_init_tmpl.c
vendored
44
third_party/dav1d/src/x86/mc_init_tmpl.c
vendored
@ -91,26 +91,46 @@ decl_mct_fn(dav1d_prep_bilin_ssse3);
|
|||||||
decl_mct_fn(dav1d_prep_bilin_sse2);
|
decl_mct_fn(dav1d_prep_bilin_sse2);
|
||||||
|
|
||||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_avx2);
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_avx2);
|
||||||
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_ssse3);
|
||||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_smooth_avx2);
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_smooth_avx2);
|
||||||
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_smooth_ssse3);
|
||||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_sharp_avx2);
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_sharp_avx2);
|
||||||
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_sharp_ssse3);
|
||||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_avx2);
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_avx2);
|
||||||
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_ssse3);
|
||||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_regular_avx2);
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_regular_avx2);
|
||||||
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_regular_ssse3);
|
||||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_sharp_avx2);
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_sharp_avx2);
|
||||||
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_sharp_ssse3);
|
||||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_avx2);
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_avx2);
|
||||||
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_ssse3);
|
||||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_regular_avx2);
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_regular_avx2);
|
||||||
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_regular_ssse3);
|
||||||
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_smooth_avx2);
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_smooth_avx2);
|
||||||
|
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_smooth_ssse3);
|
||||||
decl_mc_scaled_fn(dav1d_put_bilin_scaled_avx2);
|
decl_mc_scaled_fn(dav1d_put_bilin_scaled_avx2);
|
||||||
|
decl_mc_scaled_fn(dav1d_put_bilin_scaled_ssse3);
|
||||||
|
|
||||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_avx2);
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_avx2);
|
||||||
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_ssse3);
|
||||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_smooth_avx2);
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_smooth_avx2);
|
||||||
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_smooth_ssse3);
|
||||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_sharp_avx2);
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_sharp_avx2);
|
||||||
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_sharp_ssse3);
|
||||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_avx2);
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_avx2);
|
||||||
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_ssse3);
|
||||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_regular_avx2);
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_regular_avx2);
|
||||||
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_regular_ssse3);
|
||||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_sharp_avx2);
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_sharp_avx2);
|
||||||
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_sharp_ssse3);
|
||||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_avx2);
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_avx2);
|
||||||
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_ssse3);
|
||||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_regular_avx2);
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_regular_avx2);
|
||||||
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_regular_ssse3);
|
||||||
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_smooth_avx2);
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_smooth_avx2);
|
||||||
|
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_smooth_ssse3);
|
||||||
decl_mct_scaled_fn(dav1d_prep_bilin_scaled_avx2);
|
decl_mct_scaled_fn(dav1d_prep_bilin_scaled_avx2);
|
||||||
|
decl_mct_scaled_fn(dav1d_prep_bilin_scaled_ssse3);
|
||||||
|
|
||||||
decl_avg_fn(dav1d_avg_avx512icl);
|
decl_avg_fn(dav1d_avg_avx512icl);
|
||||||
decl_avg_fn(dav1d_avg_avx2);
|
decl_avg_fn(dav1d_avg_avx2);
|
||||||
@ -207,6 +227,30 @@ COLD void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) {
|
|||||||
init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, ssse3);
|
init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, ssse3);
|
||||||
init_mct_fn(FILTER_2D_8TAP_SHARP, 8tap_sharp, ssse3);
|
init_mct_fn(FILTER_2D_8TAP_SHARP, 8tap_sharp, ssse3);
|
||||||
|
|
||||||
|
#if ARCH_X86_64
|
||||||
|
init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR, 8tap_scaled_regular, ssse3);
|
||||||
|
init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, ssse3);
|
||||||
|
init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_scaled_regular_sharp, ssse3);
|
||||||
|
init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, ssse3);
|
||||||
|
init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH, 8tap_scaled_smooth, ssse3);
|
||||||
|
init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_scaled_smooth_sharp, ssse3);
|
||||||
|
init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_scaled_sharp_regular, ssse3);
|
||||||
|
init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_scaled_sharp_smooth, ssse3);
|
||||||
|
init_mc_scaled_fn(FILTER_2D_8TAP_SHARP, 8tap_scaled_sharp, ssse3);
|
||||||
|
init_mc_scaled_fn(FILTER_2D_BILINEAR, bilin_scaled, ssse3);
|
||||||
|
|
||||||
|
init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR, 8tap_scaled_regular, ssse3);
|
||||||
|
init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, ssse3);
|
||||||
|
init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_scaled_regular_sharp, ssse3);
|
||||||
|
init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, ssse3);
|
||||||
|
init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH, 8tap_scaled_smooth, ssse3);
|
||||||
|
init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_scaled_smooth_sharp, ssse3);
|
||||||
|
init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_scaled_sharp_regular, ssse3);
|
||||||
|
init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_scaled_sharp_smooth, ssse3);
|
||||||
|
init_mct_scaled_fn(FILTER_2D_8TAP_SHARP, 8tap_scaled_sharp, ssse3);
|
||||||
|
init_mct_scaled_fn(FILTER_2D_BILINEAR, bilin_scaled, ssse3);
|
||||||
|
#endif
|
||||||
|
|
||||||
c->avg = dav1d_avg_ssse3;
|
c->avg = dav1d_avg_ssse3;
|
||||||
c->w_avg = dav1d_w_avg_ssse3;
|
c->w_avg = dav1d_w_avg_ssse3;
|
||||||
c->mask = dav1d_mask_ssse3;
|
c->mask = dav1d_mask_ssse3;
|
||||||
|
1818
third_party/dav1d/src/x86/mc_sse.asm
vendored
1818
third_party/dav1d/src/x86/mc_sse.asm
vendored
File diff suppressed because it is too large
Load Diff
1
third_party/dav1d/src/x86/msac.asm
vendored
1
third_party/dav1d/src/x86/msac.asm
vendored
@ -23,6 +23,7 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
%include "config.asm"
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
SECTION_RODATA 64 ; avoids cacheline splits
|
SECTION_RODATA 64 ; avoids cacheline splits
|
||||||
|
24
third_party/dav1d/tests/checkasm/checkasm.c
vendored
24
third_party/dav1d/tests/checkasm/checkasm.c
vendored
@ -518,9 +518,7 @@ static void print_cpu_name(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
(void)func_new, (void)func_ref;
|
|
||||||
state.seed = get_seed();
|
state.seed = get_seed();
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
while (argc > 1) {
|
while (argc > 1) {
|
||||||
if (!strncmp(argv[1], "--help", 6)) {
|
if (!strncmp(argv[1], "--help", 6)) {
|
||||||
@ -568,6 +566,24 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
dav1d_init_cpu();
|
dav1d_init_cpu();
|
||||||
|
|
||||||
|
#ifdef readtime
|
||||||
|
if (state.bench_pattern) {
|
||||||
|
static int testing = 0;
|
||||||
|
checkasm_save_context();
|
||||||
|
if (!testing) {
|
||||||
|
checkasm_set_signal_handler_state(1);
|
||||||
|
testing = 1;
|
||||||
|
readtime();
|
||||||
|
checkasm_set_signal_handler_state(0);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "checkasm: unable to access cycle counter\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
if (!state.function_listing) {
|
if (!state.function_listing) {
|
||||||
fprintf(stderr, "checkasm: using random seed %u\n", state.seed);
|
fprintf(stderr, "checkasm: using random seed %u\n", state.seed);
|
||||||
#if ARCH_X86_64
|
#if ARCH_X86_64
|
||||||
@ -672,7 +688,9 @@ int checkasm_bench_func(void) {
|
|||||||
/* Indicate that the current test has failed, return whether verbose printing
|
/* Indicate that the current test has failed, return whether verbose printing
|
||||||
* is requested. */
|
* is requested. */
|
||||||
int checkasm_fail_func(const char *const msg, ...) {
|
int checkasm_fail_func(const char *const msg, ...) {
|
||||||
if (state.current_func_ver->cpu && state.current_func_ver->ok) {
|
if (state.current_func_ver && state.current_func_ver->cpu &&
|
||||||
|
state.current_func_ver->ok)
|
||||||
|
{
|
||||||
va_list arg;
|
va_list arg;
|
||||||
|
|
||||||
print_cpu_name();
|
print_cpu_name();
|
||||||
|
6
third_party/dav1d/tests/checkasm/checkasm.h
vendored
6
third_party/dav1d/tests/checkasm/checkasm.h
vendored
@ -86,8 +86,6 @@ int float_near_abs_eps_array(const float *a, const float *b, float eps,
|
|||||||
int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
|
int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
|
||||||
unsigned max_ulp, int len);
|
unsigned max_ulp, int len);
|
||||||
|
|
||||||
static void *func_ref, *func_new;
|
|
||||||
|
|
||||||
#define BENCH_RUNS (1 << 12) /* Trade-off between accuracy and speed */
|
#define BENCH_RUNS (1 << 12) /* Trade-off between accuracy and speed */
|
||||||
|
|
||||||
/* Decide whether or not the specified function needs to be tested */
|
/* Decide whether or not the specified function needs to be tested */
|
||||||
@ -99,6 +97,7 @@ static void *func_ref, *func_new;
|
|||||||
* is optional. */
|
* is optional. */
|
||||||
#define declare_func(ret, ...)\
|
#define declare_func(ret, ...)\
|
||||||
declare_new(ret, __VA_ARGS__)\
|
declare_new(ret, __VA_ARGS__)\
|
||||||
|
void *func_ref, *func_new;\
|
||||||
typedef ret func_type(__VA_ARGS__);\
|
typedef ret func_type(__VA_ARGS__);\
|
||||||
checkasm_save_context()
|
checkasm_save_context()
|
||||||
|
|
||||||
@ -127,6 +126,9 @@ static inline uint64_t readtime(void) {
|
|||||||
}
|
}
|
||||||
#define readtime readtime
|
#define readtime readtime
|
||||||
#endif
|
#endif
|
||||||
|
#elif (ARCH_AARCH64 || ARCH_ARM) && defined(__APPLE__)
|
||||||
|
#include <mach/mach_time.h>
|
||||||
|
#define readtime() mach_absolute_time()
|
||||||
#elif ARCH_AARCH64
|
#elif ARCH_AARCH64
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
|
26
third_party/dav1d/tests/checkasm/msac.c
vendored
26
third_party/dav1d/tests/checkasm/msac.c
vendored
@ -140,11 +140,11 @@ static void check_decode_symbol(MsacDSPContext *const c, uint8_t *const buf) {
|
|||||||
report("decode_symbol");
|
report("decode_symbol");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void check_decode_bool(MsacDSPContext *const c, uint8_t *const buf) {
|
static void check_decode_bool_adapt(MsacDSPContext *const c, uint8_t *const buf) {
|
||||||
MsacContext s_c, s_a;
|
MsacContext s_c, s_a;
|
||||||
|
|
||||||
|
declare_func(unsigned, MsacContext *s, uint16_t *cdf);
|
||||||
if (check_func(c->bool_adapt, "msac_decode_bool_adapt")) {
|
if (check_func(c->bool_adapt, "msac_decode_bool_adapt")) {
|
||||||
declare_func(unsigned, MsacContext *s, uint16_t *cdf);
|
|
||||||
uint16_t cdf[2][2];
|
uint16_t cdf[2][2];
|
||||||
for (int cdf_update = 0; cdf_update <= 1; cdf_update++) {
|
for (int cdf_update = 0; cdf_update <= 1; cdf_update++) {
|
||||||
dav1d_msac_init(&s_c, buf, BUF_SIZE, !cdf_update);
|
dav1d_msac_init(&s_c, buf, BUF_SIZE, !cdf_update);
|
||||||
@ -165,9 +165,13 @@ static void check_decode_bool(MsacDSPContext *const c, uint8_t *const buf) {
|
|||||||
bench_new(&s_a, cdf[1]);
|
bench_new(&s_a, cdf[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void check_decode_bool_equi(MsacDSPContext *const c, uint8_t *const buf) {
|
||||||
|
MsacContext s_c, s_a;
|
||||||
|
|
||||||
|
declare_func(unsigned, MsacContext *s);
|
||||||
if (check_func(c->bool_equi, "msac_decode_bool_equi")) {
|
if (check_func(c->bool_equi, "msac_decode_bool_equi")) {
|
||||||
declare_func(unsigned, MsacContext *s);
|
|
||||||
dav1d_msac_init(&s_c, buf, BUF_SIZE, 1);
|
dav1d_msac_init(&s_c, buf, BUF_SIZE, 1);
|
||||||
s_a = s_c;
|
s_a = s_c;
|
||||||
for (int i = 0; i < 64; i++) {
|
for (int i = 0; i < 64; i++) {
|
||||||
@ -180,9 +184,13 @@ static void check_decode_bool(MsacDSPContext *const c, uint8_t *const buf) {
|
|||||||
}
|
}
|
||||||
bench_new(&s_a);
|
bench_new(&s_a);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void check_decode_bool(MsacDSPContext *const c, uint8_t *const buf) {
|
||||||
|
MsacContext s_c, s_a;
|
||||||
|
|
||||||
|
declare_func(unsigned, MsacContext *s, unsigned f);
|
||||||
if (check_func(c->bool, "msac_decode_bool")) {
|
if (check_func(c->bool, "msac_decode_bool")) {
|
||||||
declare_func(unsigned, MsacContext *s, unsigned f);
|
|
||||||
dav1d_msac_init(&s_c, buf, BUF_SIZE, 1);
|
dav1d_msac_init(&s_c, buf, BUF_SIZE, 1);
|
||||||
s_a = s_c;
|
s_a = s_c;
|
||||||
for (int i = 0; i < 64; i++) {
|
for (int i = 0; i < 64; i++) {
|
||||||
@ -197,6 +205,12 @@ static void check_decode_bool(MsacDSPContext *const c, uint8_t *const buf) {
|
|||||||
bench_new(&s_a, 16384);
|
bench_new(&s_a, 16384);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static void check_decode_bool_funcs(MsacDSPContext *const c, uint8_t *const buf) {
|
||||||
|
check_decode_bool_adapt(c, buf);
|
||||||
|
check_decode_bool_equi(c, buf);
|
||||||
|
check_decode_bool(c, buf);
|
||||||
report("decode_bool");
|
report("decode_bool");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -204,8 +218,8 @@ static void check_decode_hi_tok(MsacDSPContext *const c, uint8_t *const buf) {
|
|||||||
ALIGN_STK_16(uint16_t, cdf, 2, [16]);
|
ALIGN_STK_16(uint16_t, cdf, 2, [16]);
|
||||||
MsacContext s_c, s_a;
|
MsacContext s_c, s_a;
|
||||||
|
|
||||||
|
declare_func(unsigned, MsacContext *s, uint16_t *cdf);
|
||||||
if (check_func(c->hi_tok, "msac_decode_hi_tok")) {
|
if (check_func(c->hi_tok, "msac_decode_hi_tok")) {
|
||||||
declare_func(unsigned, MsacContext *s, uint16_t *cdf);
|
|
||||||
for (int cdf_update = 0; cdf_update <= 1; cdf_update++) {
|
for (int cdf_update = 0; cdf_update <= 1; cdf_update++) {
|
||||||
dav1d_msac_init(&s_c, buf, BUF_SIZE, !cdf_update);
|
dav1d_msac_init(&s_c, buf, BUF_SIZE, !cdf_update);
|
||||||
s_a = s_c;
|
s_a = s_c;
|
||||||
@ -272,6 +286,6 @@ void checkasm_check_msac(void) {
|
|||||||
buf[i] = rnd();
|
buf[i] = rnd();
|
||||||
|
|
||||||
check_decode_symbol(&c, buf);
|
check_decode_symbol(&c, buf);
|
||||||
check_decode_bool(&c, buf);
|
check_decode_bool_funcs(&c, buf);
|
||||||
check_decode_hi_tok(&c, buf);
|
check_decode_hi_tok(&c, buf);
|
||||||
}
|
}
|
||||||
|
@ -23,8 +23,9 @@
|
|||||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
%define private_prefix checkasm
|
|
||||||
%include "config.asm"
|
%include "config.asm"
|
||||||
|
%undef private_prefix
|
||||||
|
%define private_prefix checkasm
|
||||||
%include "ext/x86/x86inc.asm"
|
%include "ext/x86/x86inc.asm"
|
||||||
|
|
||||||
SECTION_RODATA 16
|
SECTION_RODATA 16
|
||||||
|
Loading…
Reference in New Issue
Block a user