mirror of
https://gitee.com/openharmony/third_party_ffmpeg
synced 2024-11-24 03:39:45 +00:00
float_dsp: add x86-optimized functions for vector_fmac_scalar()
This commit is contained in:
parent
cb5042d02c
commit
82b2df9790
@ -42,12 +42,12 @@ typedef struct AVFloatDSPContext {
|
|||||||
* overlap exactly or not at all.
|
* overlap exactly or not at all.
|
||||||
*
|
*
|
||||||
* @param dst result vector
|
* @param dst result vector
|
||||||
* constraints: 16-byte aligned
|
* constraints: 32-byte aligned
|
||||||
* @param src input vector
|
* @param src input vector
|
||||||
* constraints: 16-byte aligned
|
* constraints: 32-byte aligned
|
||||||
* @param mul scalar value
|
* @param mul scalar value
|
||||||
* @param len length of vector
|
* @param len length of vector
|
||||||
* constraints: multiple of 4
|
* constraints: multiple of 16
|
||||||
*/
|
*/
|
||||||
void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
|
void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
|
||||||
int len);
|
int len);
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
;******************************************************************************
|
;******************************************************************************
|
||||||
|
|
||||||
%include "x86inc.asm"
|
%include "x86inc.asm"
|
||||||
|
%include "x86util.asm"
|
||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
|
|
||||||
@ -53,3 +54,49 @@ VECTOR_FMUL
|
|||||||
INIT_YMM avx
|
INIT_YMM avx
|
||||||
VECTOR_FMUL
|
VECTOR_FMUL
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
;------------------------------------------------------------------------------
|
||||||
|
; void ff_vector_fmac_scalar(float *dst, const float *src, float mul, int len)
|
||||||
|
;------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
%macro VECTOR_FMAC_SCALAR 0
|
||||||
|
%if UNIX64
|
||||||
|
cglobal vector_fmac_scalar, 3,3,3, dst, src, len
|
||||||
|
%else
|
||||||
|
cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len
|
||||||
|
%endif
|
||||||
|
%if WIN64
|
||||||
|
SWAP 0, 2
|
||||||
|
%endif
|
||||||
|
%if ARCH_X86_32
|
||||||
|
VBROADCASTSS m0, mulm
|
||||||
|
%else
|
||||||
|
shufps xmm0, xmm0, 0
|
||||||
|
%if cpuflag(avx)
|
||||||
|
vinsertf128 m0, m0, xmm0, 1
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
lea lenq, [lend*4-2*mmsize]
|
||||||
|
.loop
|
||||||
|
mulps m1, m0, [srcq+lenq ]
|
||||||
|
mulps m2, m0, [srcq+lenq+mmsize]
|
||||||
|
addps m1, m1, [dstq+lenq ]
|
||||||
|
addps m2, m2, [dstq+lenq+mmsize]
|
||||||
|
mova [dstq+lenq ], m1
|
||||||
|
mova [dstq+lenq+mmsize], m2
|
||||||
|
sub lenq, 2*mmsize
|
||||||
|
jge .loop
|
||||||
|
%if mmsize == 32
|
||||||
|
vzeroupper
|
||||||
|
RET
|
||||||
|
%else
|
||||||
|
REP_RET
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
INIT_XMM sse
|
||||||
|
VECTOR_FMAC_SCALAR
|
||||||
|
%if HAVE_AVX
|
||||||
|
INIT_YMM avx
|
||||||
|
VECTOR_FMAC_SCALAR
|
||||||
|
%endif
|
||||||
|
@ -26,6 +26,11 @@ extern void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1,
|
|||||||
extern void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1,
|
extern void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1,
|
||||||
int len);
|
int len);
|
||||||
|
|
||||||
|
extern void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul,
|
||||||
|
int len);
|
||||||
|
extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul,
|
||||||
|
int len);
|
||||||
|
|
||||||
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
||||||
{
|
{
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
@ -33,9 +38,11 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
|||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
|
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
|
||||||
fdsp->vector_fmul = ff_vector_fmul_sse;
|
fdsp->vector_fmul = ff_vector_fmul_sse;
|
||||||
|
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
|
||||||
}
|
}
|
||||||
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
|
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
|
||||||
fdsp->vector_fmul = ff_vector_fmul_avx;
|
fdsp->vector_fmul = ff_vector_fmul_avx;
|
||||||
|
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user