mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-23 19:59:48 +00:00
AMDGPU: Fix not respecting byval alignment in call frame setup
This was hackily adding in the 4-bytes reserved for the callee's emergency stack slot. Treat it like a normal stack allocation so we get the correct alignment padding behavior. This fixes an inconsistency between the caller and callee. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@340396 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4d6d5a12e8
commit
ad45fb5af4
@ -4003,13 +4003,12 @@ SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG,
|
||||
SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG,
|
||||
const SDLoc &SL,
|
||||
SDValue Chain,
|
||||
SDValue StackPtr,
|
||||
SDValue ArgVal,
|
||||
int64_t Offset) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset);
|
||||
|
||||
SDValue Ptr = DAG.getObjectPtrOffset(SL, StackPtr, Offset);
|
||||
SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32);
|
||||
SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, 4,
|
||||
MachineMemOperand::MODereferenceable);
|
||||
return Store;
|
||||
|
@ -287,7 +287,6 @@ public:
|
||||
SDValue storeStackInputValue(SelectionDAG &DAG,
|
||||
const SDLoc &SL,
|
||||
SDValue Chain,
|
||||
SDValue StackPtr,
|
||||
SDValue ArgVal,
|
||||
int64_t Offset) const;
|
||||
|
||||
|
@ -2181,11 +2181,11 @@ SDValue SITargetLowering::LowerCallResult(
|
||||
// from the explicit user arguments present in the IR.
|
||||
void SITargetLowering::passSpecialInputs(
|
||||
CallLoweringInfo &CLI,
|
||||
CCState &CCInfo,
|
||||
const SIMachineFunctionInfo &Info,
|
||||
SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
|
||||
SmallVectorImpl<SDValue> &MemOpChains,
|
||||
SDValue Chain,
|
||||
SDValue StackPtr) const {
|
||||
SDValue Chain) const {
|
||||
// If we don't have a call site, this was a call inserted by
|
||||
// legalization. These can never use special inputs.
|
||||
if (!CLI.CS)
|
||||
@ -2253,9 +2253,9 @@ void SITargetLowering::passSpecialInputs(
|
||||
if (OutgoingArg->isRegister()) {
|
||||
RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
|
||||
} else {
|
||||
SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, StackPtr,
|
||||
InputReg,
|
||||
OutgoingArg->getStackOffset());
|
||||
unsigned SpecialArgOffset = CCInfo.AllocateStack(ArgVT.getStoreSize(), 4);
|
||||
SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
|
||||
SpecialArgOffset);
|
||||
MemOpChains.push_back(ArgStore);
|
||||
}
|
||||
}
|
||||
@ -2401,8 +2401,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||
}
|
||||
|
||||
// The first 4 bytes are reserved for the callee's emergency stack slot.
|
||||
const unsigned CalleeUsableStackOffset = 4;
|
||||
|
||||
if (IsTailCall) {
|
||||
IsTailCall = isEligibleForTailCallOptimization(
|
||||
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
|
||||
@ -2441,6 +2439,10 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
|
||||
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);
|
||||
|
||||
// The first 4 bytes are reserved for the callee's emergency stack slot.
|
||||
CCInfo.AllocateStack(4, 4);
|
||||
|
||||
CCInfo.AnalyzeCallOperands(Outs, AssignFn);
|
||||
|
||||
// Get a count of how many bytes are to be pushed on the stack.
|
||||
@ -2488,10 +2490,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||
}
|
||||
}
|
||||
|
||||
// Stack pointer relative accesses are done by changing the offset SGPR. This
|
||||
// is just the VGPR offset component.
|
||||
SDValue StackPtr = DAG.getConstant(CalleeUsableStackOffset, DL, MVT::i32);
|
||||
|
||||
SmallVector<SDValue, 8> MemOpChains;
|
||||
MVT PtrVT = MVT::i32;
|
||||
|
||||
@ -2535,7 +2533,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||
unsigned LocMemOffset = VA.getLocMemOffset();
|
||||
int32_t Offset = LocMemOffset;
|
||||
|
||||
SDValue PtrOff = DAG.getObjectPtrOffset(DL, StackPtr, Offset);
|
||||
SDValue PtrOff = DAG.getConstant(Offset, DL, PtrVT);
|
||||
|
||||
if (IsTailCall) {
|
||||
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
|
||||
@ -2545,8 +2543,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||
Offset = Offset + FPDiff;
|
||||
int FI = MFI.CreateFixedObject(OpSize, Offset, true);
|
||||
|
||||
DstAddr = DAG.getObjectPtrOffset(DL, DAG.getFrameIndex(FI, PtrVT),
|
||||
StackPtr);
|
||||
DstAddr = DAG.getFrameIndex(FI, PtrVT);
|
||||
DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
|
||||
|
||||
// Make sure any stack arguments overlapping with where we're storing
|
||||
@ -2581,7 +2578,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||
}
|
||||
|
||||
// Copy special input registers after user input arguments.
|
||||
passSpecialInputs(CLI, *Info, RegsToPass, MemOpChains, Chain, StackPtr);
|
||||
passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
|
||||
|
||||
if (!MemOpChains.empty())
|
||||
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
|
||||
|
@ -265,11 +265,11 @@ public:
|
||||
|
||||
void passSpecialInputs(
|
||||
CallLoweringInfo &CLI,
|
||||
CCState &CCInfo,
|
||||
const SIMachineFunctionInfo &Info,
|
||||
SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
|
||||
SmallVectorImpl<SDValue> &MemOpChains,
|
||||
SDValue Chain,
|
||||
SDValue StackPtr) const;
|
||||
SDValue Chain) const;
|
||||
|
||||
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
|
||||
CallingConv::ID CallConv, bool isVarArg,
|
||||
|
@ -110,7 +110,7 @@ entry:
|
||||
; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @call_void_func_byval_struct_func() #0 {
|
||||
define void @call_void_func_byval_struct_func() #1 {
|
||||
entry:
|
||||
%arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
|
||||
%arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
|
||||
@ -163,7 +163,7 @@ entry:
|
||||
; GCN: s_swappc_b64
|
||||
; GCN-NOT: s_sub_u32 s32
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @call_void_func_byval_struct_kernel() #0 {
|
||||
define amdgpu_kernel void @call_void_func_byval_struct_kernel() #1 {
|
||||
entry:
|
||||
%arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
|
||||
%arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
|
||||
@ -181,6 +181,146 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}void_func_byval_struct_align8:
|
||||
; GCN: s_mov_b32 s5, s32
|
||||
; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8{{$}}
|
||||
; GCN-NOT: s32
|
||||
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s5 offset:8{{$}}
|
||||
; GCN-NOT: s32
|
||||
|
||||
; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:24{{$}}
|
||||
; GCN-NOT: s32
|
||||
; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s5 offset:24{{$}}
|
||||
; GCN-NOT: s32
|
||||
define void @void_func_byval_struct_align8(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 8 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 8 %arg1) #1 {
|
||||
entry:
|
||||
%arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
|
||||
%tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 8
|
||||
%add = add nsw i32 %tmp, 1
|
||||
store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 8
|
||||
%arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
|
||||
%tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 8
|
||||
%add3 = add nsw i32 %tmp1, 2
|
||||
store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 8
|
||||
store volatile i32 9, i32 addrspace(1)* null, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Make sure the byval alignment is respected in the call frame setup
|
||||
; GCN-LABEL: {{^}}call_void_func_byval_struct_align8_kernel:
|
||||
; GCN: s_mov_b32 s33, s7
|
||||
; GCN: s_add_u32 s32, s33, 0xc00{{$}}
|
||||
|
||||
; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
|
||||
; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
|
||||
; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8
|
||||
; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24
|
||||
|
||||
; GCN-NOT: s_add_u32 s32, s32, 0x800
|
||||
|
||||
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8
|
||||
; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12
|
||||
; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
|
||||
; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20
|
||||
|
||||
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:8{{$}}
|
||||
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:12
|
||||
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:16
|
||||
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:20
|
||||
|
||||
; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24
|
||||
; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28
|
||||
; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32
|
||||
; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36
|
||||
|
||||
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24
|
||||
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28
|
||||
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:32
|
||||
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:36
|
||||
|
||||
|
||||
; GCN: s_swappc_b64
|
||||
; GCN-NOT: s_sub_u32 s32
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @call_void_func_byval_struct_align8_kernel() #1 {
|
||||
entry:
|
||||
%arg0 = alloca %struct.ByValStruct, align 8, addrspace(5)
|
||||
%arg1 = alloca %struct.ByValStruct, align 8, addrspace(5)
|
||||
%tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
|
||||
call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
|
||||
%tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
|
||||
call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
|
||||
%arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
|
||||
store volatile i32 9, i32 addrspace(5)* %arrayidx, align 8
|
||||
%arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
|
||||
store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 8
|
||||
call void @void_func_byval_struct_align8(%struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg1)
|
||||
call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
|
||||
call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}call_void_func_byval_struct_align8_func:
|
||||
; GCN: s_mov_b32 s5, s32
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
|
||||
; GCN-DAG: v_writelane_b32
|
||||
|
||||
; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
|
||||
; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
|
||||
|
||||
; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8
|
||||
; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24
|
||||
|
||||
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8
|
||||
; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12
|
||||
; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16
|
||||
; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20
|
||||
|
||||
; GCN-NOT: s_add_u32 s32, s32, 0x800
|
||||
|
||||
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:8{{$}}
|
||||
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:12
|
||||
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:16
|
||||
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:20
|
||||
|
||||
; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24
|
||||
; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28
|
||||
; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32
|
||||
; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36
|
||||
|
||||
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24
|
||||
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28
|
||||
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:32
|
||||
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:36
|
||||
|
||||
; GCN: s_swappc_b64
|
||||
; GCN-NOT: v_readlane_b32 s32
|
||||
; GCN: v_readlane_b32
|
||||
; GCN-NOT: v_readlane_b32 s32
|
||||
|
||||
; GCN-NOT: s_sub_u32 s32, s32, 0x800
|
||||
|
||||
; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @call_void_func_byval_struct_align8_func() #0 {
|
||||
entry:
|
||||
%arg0 = alloca %struct.ByValStruct, align 8, addrspace(5)
|
||||
%arg1 = alloca %struct.ByValStruct, align 8, addrspace(5)
|
||||
%tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
|
||||
call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
|
||||
%tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
|
||||
call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
|
||||
%arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
|
||||
store volatile i32 9, i32 addrspace(5)* %arrayidx, align 8
|
||||
%arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
|
||||
store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 8
|
||||
call void @void_func_byval_struct_align8(%struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg1)
|
||||
call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
|
||||
call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel_no_frame_pointer_elim:
|
||||
define amdgpu_kernel void @call_void_func_byval_struct_kernel_no_frame_pointer_elim() #2 {
|
||||
entry:
|
||||
|
@ -290,7 +290,7 @@ define void @too_many_args_use_workitem_id_x(
|
||||
|
||||
; GCN: s_mov_b32 s33, s7
|
||||
; GCN: s_mov_b32 s32, s33
|
||||
; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:8
|
||||
; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
|
||||
; GCN: s_mov_b32 s4, s33
|
||||
; GCN: s_swappc_b64
|
||||
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
|
||||
@ -308,7 +308,7 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
|
||||
|
||||
; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
|
||||
; GCN: s_mov_b32 s5, s32
|
||||
; GCN: buffer_store_dword v1, off, s[0:3], s32 offset:8
|
||||
; GCN: buffer_store_dword v1, off, s[0:3], s32 offset:
|
||||
; GCN: s_swappc_b64
|
||||
define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
|
||||
store volatile i32 %arg0, i32 addrspace(1)* undef
|
||||
@ -330,7 +330,7 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
|
||||
; GCN: s_add_u32 s32, s32, 0x400{{$}}
|
||||
; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
|
||||
|
||||
; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:8{{$}}
|
||||
; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4{{$}}
|
||||
|
||||
; GCN: s_swappc_b64
|
||||
|
||||
@ -428,7 +428,7 @@ define void @too_many_args_use_workitem_id_x_byval(
|
||||
; GCN-NOT: s32
|
||||
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
|
||||
; GCN: buffer_store_dword [[K]], off, s[0:3], s33 offset:4
|
||||
; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:12
|
||||
; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:8
|
||||
|
||||
; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33 offset:4
|
||||
; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
|
||||
@ -453,7 +453,7 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1
|
||||
; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
|
||||
; GCN: buffer_store_dword [[K]], off, s[0:3], s5 offset:4
|
||||
; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:12
|
||||
; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:8
|
||||
|
||||
; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s5 offset:4
|
||||
; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
|
||||
@ -539,11 +539,10 @@ define void @too_many_args_use_workitem_id_xyz(
|
||||
ret void
|
||||
}
|
||||
|
||||
; frame[0] = kernel emergency stack slot
|
||||
; frame[1] = callee emergency stack slot
|
||||
; frame[2] = ID X
|
||||
; frame[3] = ID Y
|
||||
; frame[4] = ID Z
|
||||
; frame[0] = callee emergency stack slot
|
||||
; frame[1] = ID X
|
||||
; frame[2] = ID Y
|
||||
; frame[3] = ID Z
|
||||
|
||||
; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
|
||||
; GCN: enable_vgpr_workitem_id = 2
|
||||
@ -551,9 +550,9 @@ define void @too_many_args_use_workitem_id_xyz(
|
||||
; GCN: s_mov_b32 s33, s7
|
||||
; GCN: s_mov_b32 s32, s33
|
||||
|
||||
; GCN-DAG: buffer_store_dword v0, off, s[0:3], s32 offset:8
|
||||
; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32 offset:12
|
||||
; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:16
|
||||
; GCN-DAG: buffer_store_dword v0, off, s[0:3], s32 offset:4
|
||||
; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32 offset:8
|
||||
; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:12
|
||||
; GCN: s_swappc_b64
|
||||
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
|
||||
call void @too_many_args_use_workitem_id_xyz(
|
||||
@ -635,10 +634,9 @@ define void @too_many_args_use_workitem_id_x_stack_yz(
|
||||
ret void
|
||||
}
|
||||
|
||||
; frame[0] = kernel emergency stack slot
|
||||
; frame[1] = callee emergency stack slot
|
||||
; frame[2] = ID Y
|
||||
; frame[3] = ID Z
|
||||
; frame[0] = callee emergency stack slot
|
||||
; frame[1] = ID Y
|
||||
; frame[2] = ID Z
|
||||
|
||||
; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
|
||||
; GCN: enable_vgpr_workitem_id = 2
|
||||
@ -647,8 +645,8 @@ define void @too_many_args_use_workitem_id_x_stack_yz(
|
||||
; GCN: s_mov_b32 s32, s33
|
||||
|
||||
; GCN-DAG: v_mov_b32_e32 v31, v0
|
||||
; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32 offset:8
|
||||
; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:12
|
||||
; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32 offset:4
|
||||
; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:8
|
||||
; GCN: s_swappc_b64
|
||||
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
|
||||
call void @too_many_args_use_workitem_id_x_stack_yz(
|
||||
|
Loading…
Reference in New Issue
Block a user