mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-15 07:59:57 +00:00
R600: Use the same compute kernel calling convention for all GPUs
A side-effect of this is that now the compiler expects kernel arguments to be 4-byte aligned. Reviewed-by: Vincent Lejeune <vljn at ovi.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186916 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5864284d71
commit
f502c292f6
@ -36,9 +36,9 @@ def CC_SI : CallingConv<[
|
||||
|
||||
]>;
|
||||
|
||||
// Calling convention for SI compute kernels
|
||||
def CC_SI_Kernel : CallingConv<[
|
||||
CCIfType<[v4i32, v4f32], CCAssignToStack <16, 4>>,
|
||||
// Calling convention for compute kernels
|
||||
def CC_AMDGPU_Kernel : CallingConv<[
|
||||
CCIfType<[v4i32, v4f32], CCAssignToStack <16, 16>>,
|
||||
CCIfType<[i64, f64], CCAssignToStack < 8, 4>>,
|
||||
CCIfType<[i32, f32], CCAssignToStack < 4, 4>>,
|
||||
CCIfType<[i16], CCAssignToStack < 2, 4>>,
|
||||
@ -46,8 +46,14 @@ def CC_SI_Kernel : CallingConv<[
|
||||
]>;
|
||||
|
||||
def CC_AMDGPU : CallingConv<[
|
||||
CCIf<"State.getMachineFunction().getInfo<SIMachineFunctionInfo>()->"#
|
||||
"ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_SI_Kernel>>,
|
||||
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() == "
|
||||
"AMDGPUSubtarget::SOUTHERN_ISLANDS && "
|
||||
"State.getMachineFunction().getInfo<SIMachineFunctionInfo>()->"#
|
||||
"ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
|
||||
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() < "
|
||||
"AMDGPUSubtarget::SOUTHERN_ISLANDS && "
|
||||
"State.getMachineFunction().getInfo<R600MachineFunctionInfo>()->"
|
||||
"ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
|
||||
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
|
||||
".getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>
|
||||
]>;
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "R600Defines.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
@ -1212,11 +1213,17 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
SDLoc DL, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const {
|
||||
unsigned ParamOffsetBytes = 36;
|
||||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
|
||||
getTargetMachine(), ArgLocs, *DAG.getContext());
|
||||
|
||||
AnalyzeFormalArguments(CCInfo, Ins);
|
||||
|
||||
Function::const_arg_iterator FuncArg =
|
||||
DAG.getMachineFunction().getFunction()->arg_begin();
|
||||
for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
|
||||
EVT VT = Ins[i].VT;
|
||||
CCValAssign &VA = ArgLocs[i];
|
||||
EVT VT = VA.getLocVT();
|
||||
Type *ArgType = FuncArg->getType();
|
||||
unsigned ArgSizeInBits = ArgType->isPointerTy() ?
|
||||
32 : ArgType->getPrimitiveSizeInBits();
|
||||
@ -1239,12 +1246,14 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||
|
||||
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
||||
AMDGPUAS::PARAM_I_ADDRESS);
|
||||
|
||||
// The first 36 bytes of the input buffer contains information about
|
||||
// thread group and global sizes.
|
||||
SDValue Arg = DAG.getExtLoad(LoadType, DL, VT, DAG.getRoot(),
|
||||
DAG.getConstant(ParamOffsetBytes, MVT::i32),
|
||||
MachinePointerInfo(UndefValue::get(PtrTy)),
|
||||
ArgVT, false, false, ArgBytes);
|
||||
DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
|
||||
MachinePointerInfo(UndefValue::get(PtrTy)),
|
||||
ArgVT, false, false, ArgBytes);
|
||||
InVals.push_back(Arg);
|
||||
ParamOffsetBytes += ArgBytes;
|
||||
}
|
||||
return Chain;
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
|
||||
|
||||
; R600-CHECK: @v4i32_kernel_arg
|
||||
; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40
|
||||
; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 52
|
||||
; SI-CHECK: @v4i32_kernel_arg
|
||||
; SI-CHECK: BUFFER_STORE_DWORDX4
|
||||
define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
|
||||
@ -12,7 +12,7 @@ entry:
|
||||
}
|
||||
|
||||
; R600-CHECK: @v4f32_kernel_arg
|
||||
; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40
|
||||
; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 52
|
||||
; SI-CHECK: @v4f32_kernel_arg
|
||||
; SI-CHECK: BUFFER_STORE_DWORDX4
|
||||
define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float> %in) {
|
||||
|
Loading…
Reference in New Issue
Block a user