mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2024-12-13 22:58:40 +00:00
x86inc: Support arbitrary stack alignments
Change ALLOC_STACK to always align the stack before allocating stack space for consistency. Previously alignment would occur either before or after allocating stack space depending on whether manual alignment was required or not.
This commit is contained in:
parent
99f8fc725d
commit
826790f596
@ -446,13 +446,13 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2
|
|||||||
; int8_t *tc0)
|
; int8_t *tc0)
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
INIT_MMX cpuname
|
INIT_MMX cpuname
|
||||||
cglobal deblock_h_luma_8, 0,5,8,0x60+HAVE_ALIGNED_STACK*12
|
cglobal deblock_h_luma_8, 0,5,8,0x60+12
|
||||||
mov r0, r0mp
|
mov r0, r0mp
|
||||||
mov r3, r1m
|
mov r3, r1m
|
||||||
lea r4, [r3*3]
|
lea r4, [r3*3]
|
||||||
sub r0, 4
|
sub r0, 4
|
||||||
lea r1, [r0+r4]
|
lea r1, [r0+r4]
|
||||||
%define pix_tmp esp+12*HAVE_ALIGNED_STACK
|
%define pix_tmp esp+12
|
||||||
|
|
||||||
; transpose 6x16 -> tmp space
|
; transpose 6x16 -> tmp space
|
||||||
TRANSPOSE6x8_MEM PASS8ROWS(r0, r1, r3, r4), pix_tmp
|
TRANSPOSE6x8_MEM PASS8ROWS(r0, r1, r3, r4), pix_tmp
|
||||||
|
@ -42,6 +42,17 @@
|
|||||||
%define public_prefix private_prefix
|
%define public_prefix private_prefix
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%if HAVE_ALIGNED_STACK
|
||||||
|
%define STACK_ALIGNMENT 16
|
||||||
|
%endif
|
||||||
|
%ifndef STACK_ALIGNMENT
|
||||||
|
%if ARCH_X86_64
|
||||||
|
%define STACK_ALIGNMENT 16
|
||||||
|
%else
|
||||||
|
%define STACK_ALIGNMENT 4
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
%define WIN64 0
|
%define WIN64 0
|
||||||
%define UNIX64 0
|
%define UNIX64 0
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
@ -117,8 +128,9 @@
|
|||||||
; %1 = number of arguments. loads them from stack if needed.
|
; %1 = number of arguments. loads them from stack if needed.
|
||||||
; %2 = number of registers used. pushes callee-saved regs if needed.
|
; %2 = number of registers used. pushes callee-saved regs if needed.
|
||||||
; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
|
; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
|
||||||
; %4 = (optional) stack size to be allocated. If not aligned (x86-32 ICC 10.x,
|
; %4 = (optional) stack size to be allocated. The stack will be aligned before
|
||||||
; MSVC or YMM), the stack will be manually aligned (to 16 or 32 bytes),
|
; allocating the specified stack size. If the required stack alignment is
|
||||||
|
; larger than the known stack alignment the stack will be manually aligned
|
||||||
; and an extra register will be allocated to hold the original stack
|
; and an extra register will be allocated to hold the original stack
|
||||||
; pointer (to not invalidate r0m etc.). To prevent the use of an extra
|
; pointer (to not invalidate r0m etc.). To prevent the use of an extra
|
||||||
; register as stack pointer, request a negative stack size.
|
; register as stack pointer, request a negative stack size.
|
||||||
@ -126,8 +138,10 @@
|
|||||||
; PROLOGUE can also be invoked by adding the same options to cglobal
|
; PROLOGUE can also be invoked by adding the same options to cglobal
|
||||||
|
|
||||||
; e.g.
|
; e.g.
|
||||||
; cglobal foo, 2,3,0, dst, src, tmp
|
; cglobal foo, 2,3,7,0x40, dst, src, tmp
|
||||||
; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
|
; declares a function (foo) that automatically loads two arguments (dst and
|
||||||
|
; src) into registers, uses one additional register (tmp) plus 7 vector
|
||||||
|
; registers (m0-m6) and allocates 0x40 bytes of stack space.
|
||||||
|
|
||||||
; TODO Some functions can use some args directly from the stack. If they're the
|
; TODO Some functions can use some args directly from the stack. If they're the
|
||||||
; last args then you can just not declare them, but if they're in the middle
|
; last args then you can just not declare them, but if they're in the middle
|
||||||
@ -328,26 +342,28 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
|
|||||||
%assign n_arg_names %0
|
%assign n_arg_names %0
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
%define required_stack_alignment ((mmsize + 15) & ~15)
|
||||||
|
|
||||||
%macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
|
%macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
|
||||||
%ifnum %1
|
%ifnum %1
|
||||||
%if %1 != 0
|
%if %1 != 0
|
||||||
%assign %%stack_alignment ((mmsize + 15) & ~15)
|
%assign %%pad 0
|
||||||
%assign stack_size %1
|
%assign stack_size %1
|
||||||
%if stack_size < 0
|
%if stack_size < 0
|
||||||
%assign stack_size -stack_size
|
%assign stack_size -stack_size
|
||||||
%endif
|
%endif
|
||||||
%assign stack_size_padded stack_size
|
|
||||||
%if WIN64
|
%if WIN64
|
||||||
%assign stack_size_padded stack_size_padded + 32 ; reserve 32 bytes for shadow space
|
%assign %%pad %%pad + 32 ; shadow space
|
||||||
%if mmsize != 8
|
%if mmsize != 8
|
||||||
%assign xmm_regs_used %2
|
%assign xmm_regs_used %2
|
||||||
%if xmm_regs_used > 8
|
%if xmm_regs_used > 8
|
||||||
%assign stack_size_padded stack_size_padded + (xmm_regs_used-8)*16
|
%assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
%if mmsize <= 16 && HAVE_ALIGNED_STACK
|
%if required_stack_alignment <= STACK_ALIGNMENT
|
||||||
%assign stack_size_padded stack_size_padded + %%stack_alignment - gprsize - (stack_offset & (%%stack_alignment - 1))
|
; maintain the current stack alignment
|
||||||
|
%assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
|
||||||
SUB rsp, stack_size_padded
|
SUB rsp, stack_size_padded
|
||||||
%else
|
%else
|
||||||
%assign %%reg_num (regs_used - 1)
|
%assign %%reg_num (regs_used - 1)
|
||||||
@ -356,17 +372,17 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
|
|||||||
; it, i.e. in [rsp+stack_size_padded], so we can restore the
|
; it, i.e. in [rsp+stack_size_padded], so we can restore the
|
||||||
; stack in a single instruction (i.e. mov rsp, rstk or mov
|
; stack in a single instruction (i.e. mov rsp, rstk or mov
|
||||||
; rsp, [rsp+stack_size_padded])
|
; rsp, [rsp+stack_size_padded])
|
||||||
mov rstk, rsp
|
|
||||||
%if %1 < 0 ; need to store rsp on stack
|
%if %1 < 0 ; need to store rsp on stack
|
||||||
sub rsp, gprsize+stack_size_padded
|
%xdefine rstkm [rsp + stack_size + %%pad]
|
||||||
and rsp, ~(%%stack_alignment-1)
|
%assign %%pad %%pad + gprsize
|
||||||
%xdefine rstkm [rsp+stack_size_padded]
|
|
||||||
mov rstkm, rstk
|
|
||||||
%else ; can keep rsp in rstk during whole function
|
%else ; can keep rsp in rstk during whole function
|
||||||
sub rsp, stack_size_padded
|
|
||||||
and rsp, ~(%%stack_alignment-1)
|
|
||||||
%xdefine rstkm rstk
|
%xdefine rstkm rstk
|
||||||
%endif
|
%endif
|
||||||
|
%assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1))
|
||||||
|
mov rstk, rsp
|
||||||
|
and rsp, ~(required_stack_alignment-1)
|
||||||
|
sub rsp, stack_size_padded
|
||||||
|
movifnidn rstkm, rstk
|
||||||
%endif
|
%endif
|
||||||
WIN64_PUSH_XMM
|
WIN64_PUSH_XMM
|
||||||
%endif
|
%endif
|
||||||
@ -375,7 +391,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
|
|||||||
|
|
||||||
%macro SETUP_STACK_POINTER 1
|
%macro SETUP_STACK_POINTER 1
|
||||||
%ifnum %1
|
%ifnum %1
|
||||||
%if %1 != 0 && (HAVE_ALIGNED_STACK == 0 || mmsize == 32)
|
%if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT
|
||||||
%if %1 > 0
|
%if %1 > 0
|
||||||
%assign regs_used (regs_used + 1)
|
%assign regs_used (regs_used + 1)
|
||||||
%elif ARCH_X86_64 && regs_used == num_args && num_args <= 4 + UNIX64 * 2
|
%elif ARCH_X86_64 && regs_used == num_args && num_args <= 4 + UNIX64 * 2
|
||||||
@ -449,7 +465,9 @@ DECLARE_REG 14, R15, 120
|
|||||||
%assign xmm_regs_used %1
|
%assign xmm_regs_used %1
|
||||||
ASSERT xmm_regs_used <= 16
|
ASSERT xmm_regs_used <= 16
|
||||||
%if xmm_regs_used > 8
|
%if xmm_regs_used > 8
|
||||||
%assign stack_size_padded (xmm_regs_used-8)*16 + (~stack_offset&8) + 32
|
; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
|
||||||
|
%assign %%pad (xmm_regs_used-8)*16 + 32
|
||||||
|
%assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
|
||||||
SUB rsp, stack_size_padded
|
SUB rsp, stack_size_padded
|
||||||
%endif
|
%endif
|
||||||
WIN64_PUSH_XMM
|
WIN64_PUSH_XMM
|
||||||
@ -465,7 +483,7 @@ DECLARE_REG 14, R15, 120
|
|||||||
%endrep
|
%endrep
|
||||||
%endif
|
%endif
|
||||||
%if stack_size_padded > 0
|
%if stack_size_padded > 0
|
||||||
%if stack_size > 0 && (mmsize == 32 || HAVE_ALIGNED_STACK == 0)
|
%if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT
|
||||||
mov rsp, rstkm
|
mov rsp, rstkm
|
||||||
%else
|
%else
|
||||||
add %1, stack_size_padded
|
add %1, stack_size_padded
|
||||||
@ -531,7 +549,7 @@ DECLARE_REG 14, R15, 72
|
|||||||
|
|
||||||
%macro RET 0
|
%macro RET 0
|
||||||
%if stack_size_padded > 0
|
%if stack_size_padded > 0
|
||||||
%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
|
%if required_stack_alignment > STACK_ALIGNMENT
|
||||||
mov rsp, rstkm
|
mov rsp, rstkm
|
||||||
%else
|
%else
|
||||||
add rsp, stack_size_padded
|
add rsp, stack_size_padded
|
||||||
@ -587,7 +605,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
|
|||||||
|
|
||||||
%macro RET 0
|
%macro RET 0
|
||||||
%if stack_size_padded > 0
|
%if stack_size_padded > 0
|
||||||
%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
|
%if required_stack_alignment > STACK_ALIGNMENT
|
||||||
mov rsp, rstkm
|
mov rsp, rstkm
|
||||||
%else
|
%else
|
||||||
add rsp, stack_size_padded
|
add rsp, stack_size_padded
|
||||||
|
Loading…
Reference in New Issue
Block a user