mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-05 03:46:27 +00:00
R600/SI: Convert v16i8 resource descriptors to i128
Now that compute support is better on SI, we can't continue using v16i8 for descriptors since this is also a legal type in OpenCL. This patch fixes numerous hangs with the piglit OpenCL test and since we now use a target specific DAG node for LOAD_CONSTANT with the correct MemOperandFlags, this should also fix: https://bugs.freedesktop.org/show_bug.cgi?id=66805 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188429 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
34f505e227
commit
68db37b952
@ -34,6 +34,7 @@ FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
|
||||
FunctionPass *createAMDGPUCFGStructurizerPass(TargetMachine &tm);
|
||||
|
||||
// SI Passes
|
||||
FunctionPass *createSITypeRewriter();
|
||||
FunctionPass *createSIAnnotateControlFlowPass();
|
||||
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
|
||||
FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
|
||||
|
@ -507,5 +507,11 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(CONST_ADDRESS)
|
||||
NODE_NAME_CASE(REGISTER_LOAD)
|
||||
NODE_NAME_CASE(REGISTER_STORE)
|
||||
NODE_NAME_CASE(LOAD_CONSTANT)
|
||||
NODE_NAME_CASE(LOAD_INPUT)
|
||||
NODE_NAME_CASE(SAMPLE)
|
||||
NODE_NAME_CASE(SAMPLEB)
|
||||
NODE_NAME_CASE(SAMPLED)
|
||||
NODE_NAME_CASE(SAMPLEL)
|
||||
}
|
||||
}
|
||||
|
@ -139,6 +139,13 @@ enum {
|
||||
CONST_ADDRESS,
|
||||
REGISTER_LOAD,
|
||||
REGISTER_STORE,
|
||||
LOAD_INPUT,
|
||||
SAMPLE,
|
||||
SAMPLEB,
|
||||
SAMPLED,
|
||||
SAMPLEL,
|
||||
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||||
LOAD_CONSTANT,
|
||||
LAST_AMDGPU_ISD_NUMBER
|
||||
};
|
||||
|
||||
|
@ -121,6 +121,7 @@ AMDGPUPassConfig::addPreISel() {
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||
addPass(createFlattenCFGPass());
|
||||
if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
addPass(createSITypeRewriter());
|
||||
addPass(createStructurizeCFGPass());
|
||||
addPass(createSIAnnotateControlFlowPass());
|
||||
} else {
|
||||
|
@ -47,6 +47,7 @@ add_llvm_target(R600CodeGen
|
||||
SILowerControlFlow.cpp
|
||||
SIMachineFunctionInfo.cpp
|
||||
SIRegisterInfo.cpp
|
||||
SITypeRewriter.cpp
|
||||
)
|
||||
|
||||
add_dependencies(LLVMR600CodeGen AMDGPUCommonTableGen intrinsics_gen)
|
||||
|
@ -37,7 +37,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
addRegisterClass(MVT::v2i1, &AMDGPU::VReg_64RegClass);
|
||||
addRegisterClass(MVT::v4i1, &AMDGPU::VReg_128RegClass);
|
||||
|
||||
addRegisterClass(MVT::v16i8, &AMDGPU::SReg_128RegClass);
|
||||
addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
|
||||
addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
|
||||
|
||||
@ -70,6 +69,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::ADD, MVT::i64, Legal);
|
||||
setOperationAction(ISD::ADD, MVT::i32, Legal);
|
||||
|
||||
setOperationAction(ISD::BITCAST, MVT::i128, Legal);
|
||||
|
||||
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
||||
|
||||
@ -82,6 +83,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
|
||||
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
|
||||
|
||||
@ -415,7 +419,31 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case Intrinsic::r600_read_tidig_z:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
|
||||
AMDGPU::VGPR2, VT);
|
||||
case AMDGPUIntrinsic::SI_load_const: {
|
||||
SDValue Ops [] = {
|
||||
ResourceDescriptorToi128(Op.getOperand(1), DAG),
|
||||
Op.getOperand(2)
|
||||
};
|
||||
|
||||
MachineMemOperand *MMO = new MachineMemOperand(MachinePointerInfo(),
|
||||
MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
|
||||
VT.getSizeInBits() / 8, 4);
|
||||
return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
|
||||
Op->getVTList(), Ops, 2, VT, MMO);
|
||||
}
|
||||
case AMDGPUIntrinsic::SI_sample:
|
||||
return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
|
||||
case AMDGPUIntrinsic::SI_sampleb:
|
||||
return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG);
|
||||
case AMDGPUIntrinsic::SI_sampled:
|
||||
return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG);
|
||||
case AMDGPUIntrinsic::SI_samplel:
|
||||
return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
|
||||
case AMDGPUIntrinsic::SI_vs_load_input:
|
||||
return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
|
||||
ResourceDescriptorToi128(Op.getOperand(1), DAG),
|
||||
Op.getOperand(2),
|
||||
Op.getOperand(3));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -516,6 +544,29 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
|
||||
return Chain;
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
|
||||
if (Op.getValueType() == MVT::i128) {
|
||||
return Op;
|
||||
}
|
||||
|
||||
assert(Op.getOpcode() == ISD::UNDEF);
|
||||
|
||||
return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), MVT::i128,
|
||||
DAG.getConstant(0, MVT::i64),
|
||||
DAG.getConstant(0, MVT::i64));
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
|
||||
const SDValue &Op,
|
||||
SelectionDAG &DAG) const {
|
||||
return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
|
||||
Op.getOperand(2),
|
||||
ResourceDescriptorToi128(Op.getOperand(3), DAG),
|
||||
Op.getOperand(4));
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
|
@ -23,11 +23,14 @@ namespace llvm {
|
||||
class SITargetLowering : public AMDGPUTargetLowering {
|
||||
SDValue LowerParameter(SelectionDAG &DAG, EVT VT, SDLoc DL,
|
||||
SDValue Chain, unsigned Offset) const;
|
||||
SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const;
|
||||
bool foldImm(SDValue &Operand, int32_t &Immediate,
|
||||
bool &ScalarSlotUsed) const;
|
||||
const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG,
|
||||
|
@ -16,6 +16,26 @@ def SIadd64bit32bit : SDNode<"ISD::ADD",
|
||||
SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]>
|
||||
>;
|
||||
|
||||
def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, i128>, SDTCisVT<2, i32>]>,
|
||||
[SDNPMayLoad, SDNPMemOperand]
|
||||
>;
|
||||
|
||||
def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
|
||||
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>,
|
||||
SDTCisVT<3, i32>]>
|
||||
>;
|
||||
|
||||
class SDSample<string opcode> : SDNode <opcode,
|
||||
SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVec<1>, SDTCisVT<2, v32i8>,
|
||||
SDTCisVT<3, i128>, SDTCisVT<4, i32>]>
|
||||
>;
|
||||
|
||||
def SIsample : SDSample<"AMDGPUISD::SAMPLE">;
|
||||
def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
|
||||
def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
|
||||
def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
|
||||
|
||||
// Transformation function, extract the lower 32bit of a 64bit immediate
|
||||
def LO32 : SDNodeXForm<imm, [{
|
||||
return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
|
||||
|
@ -1303,7 +1303,7 @@ def : Pat <
|
||||
|
||||
/* int_SI_vs_load_input */
|
||||
def : Pat<
|
||||
(int_SI_vs_load_input v16i8:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
|
||||
(SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
|
||||
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset)
|
||||
>;
|
||||
|
||||
@ -1324,63 +1324,63 @@ def : Pat <
|
||||
/********** Image sampling patterns **********/
|
||||
/********** ======================= **********/
|
||||
|
||||
/* int_SI_sample for simple 1D texture lookup */
|
||||
/* SIsample for simple 1D texture lookup */
|
||||
def : Pat <
|
||||
(int_SI_sample v1i32:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
|
||||
(SIsample v1i32:$addr, v32i8:$rsrc, i128:$sampler, imm),
|
||||
(IMAGE_SAMPLE_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||
>;
|
||||
|
||||
class SamplePattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
|
||||
(name vt:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
|
||||
class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
|
||||
(name vt:$addr, v32i8:$rsrc, i128:$sampler, imm),
|
||||
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||
>;
|
||||
|
||||
class SampleRectPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
|
||||
(name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_RECT),
|
||||
class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
|
||||
(name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_RECT),
|
||||
(opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||
>;
|
||||
|
||||
class SampleArrayPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
|
||||
(name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_ARRAY),
|
||||
class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
|
||||
(name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_ARRAY),
|
||||
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||
>;
|
||||
|
||||
class SampleShadowPattern<Intrinsic name, MIMG opcode,
|
||||
class SampleShadowPattern<SDNode name, MIMG opcode,
|
||||
ValueType vt> : Pat <
|
||||
(name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW),
|
||||
(name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW),
|
||||
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||
>;
|
||||
|
||||
class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
|
||||
class SampleShadowArrayPattern<SDNode name, MIMG opcode,
|
||||
ValueType vt> : Pat <
|
||||
(name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW_ARRAY),
|
||||
(name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW_ARRAY),
|
||||
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||
>;
|
||||
|
||||
/* int_SI_sample* for texture lookups consuming more address parameters */
|
||||
/* SIsample* for texture lookups consuming more address parameters */
|
||||
multiclass SamplePatterns<MIMG sample, MIMG sample_c, MIMG sample_l,
|
||||
MIMG sample_c_l, MIMG sample_b, MIMG sample_c_b,
|
||||
MIMG sample_d, MIMG sample_c_d, ValueType addr_type> {
|
||||
def : SamplePattern <int_SI_sample, sample, addr_type>;
|
||||
def : SampleRectPattern <int_SI_sample, sample, addr_type>;
|
||||
def : SampleArrayPattern <int_SI_sample, sample, addr_type>;
|
||||
def : SampleShadowPattern <int_SI_sample, sample_c, addr_type>;
|
||||
def : SampleShadowArrayPattern <int_SI_sample, sample_c, addr_type>;
|
||||
def : SamplePattern <SIsample, sample, addr_type>;
|
||||
def : SampleRectPattern <SIsample, sample, addr_type>;
|
||||
def : SampleArrayPattern <SIsample, sample, addr_type>;
|
||||
def : SampleShadowPattern <SIsample, sample_c, addr_type>;
|
||||
def : SampleShadowArrayPattern <SIsample, sample_c, addr_type>;
|
||||
|
||||
def : SamplePattern <int_SI_samplel, sample_l, addr_type>;
|
||||
def : SampleArrayPattern <int_SI_samplel, sample_l, addr_type>;
|
||||
def : SampleShadowPattern <int_SI_samplel, sample_c_l, addr_type>;
|
||||
def : SampleShadowArrayPattern <int_SI_samplel, sample_c_l, addr_type>;
|
||||
def : SamplePattern <SIsamplel, sample_l, addr_type>;
|
||||
def : SampleArrayPattern <SIsamplel, sample_l, addr_type>;
|
||||
def : SampleShadowPattern <SIsamplel, sample_c_l, addr_type>;
|
||||
def : SampleShadowArrayPattern <SIsamplel, sample_c_l, addr_type>;
|
||||
|
||||
def : SamplePattern <int_SI_sampleb, sample_b, addr_type>;
|
||||
def : SampleArrayPattern <int_SI_sampleb, sample_b, addr_type>;
|
||||
def : SampleShadowPattern <int_SI_sampleb, sample_c_b, addr_type>;
|
||||
def : SampleShadowArrayPattern <int_SI_sampleb, sample_c_b, addr_type>;
|
||||
def : SamplePattern <SIsampleb, sample_b, addr_type>;
|
||||
def : SampleArrayPattern <SIsampleb, sample_b, addr_type>;
|
||||
def : SampleShadowPattern <SIsampleb, sample_c_b, addr_type>;
|
||||
def : SampleShadowArrayPattern <SIsampleb, sample_c_b, addr_type>;
|
||||
|
||||
def : SamplePattern <int_SI_sampled, sample_d, addr_type>;
|
||||
def : SampleArrayPattern <int_SI_sampled, sample_d, addr_type>;
|
||||
def : SampleShadowPattern <int_SI_sampled, sample_c_d, addr_type>;
|
||||
def : SampleShadowArrayPattern <int_SI_sampled, sample_c_d, addr_type>;
|
||||
def : SamplePattern <SIsampled, sample_d, addr_type>;
|
||||
def : SampleArrayPattern <SIsampled, sample_d, addr_type>;
|
||||
def : SampleShadowPattern <SIsampled, sample_c_d, addr_type>;
|
||||
def : SampleShadowArrayPattern <SIsampled, sample_c_d, addr_type>;
|
||||
}
|
||||
|
||||
defm : SamplePatterns<IMAGE_SAMPLE_V2, IMAGE_SAMPLE_C_V2,
|
||||
@ -1694,19 +1694,19 @@ def : Pat <
|
||||
|
||||
// 1. Offset as 8bit DWORD immediate
|
||||
def : Pat <
|
||||
(int_SI_load_const v16i8:$sbase, IMM8bitDWORD:$offset),
|
||||
(SIload_constant i128:$sbase, IMM8bitDWORD:$offset),
|
||||
(S_BUFFER_LOAD_DWORD_IMM $sbase, IMM8bitDWORD:$offset)
|
||||
>;
|
||||
|
||||
// 2. Offset loaded in an 32bit SGPR
|
||||
def : Pat <
|
||||
(int_SI_load_const v16i8:$sbase, imm:$offset),
|
||||
(SIload_constant i128:$sbase, imm:$offset),
|
||||
(S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
|
||||
>;
|
||||
|
||||
// 3. Offset in an 32Bit VGPR
|
||||
def : Pat <
|
||||
(int_SI_load_const v16i8:$sbase, i32:$voff),
|
||||
(SIload_constant i128:$sbase, i32:$voff),
|
||||
(BUFFER_LOAD_DWORD_OFFEN $sbase, $voff)
|
||||
>;
|
||||
|
||||
@ -1777,7 +1777,7 @@ defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, i128>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -17,10 +17,10 @@ let TargetPrefix = "SI", isTarget = 1 in {
|
||||
def int_SI_tid : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
|
||||
def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
|
||||
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
|
||||
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
|
||||
|
||||
class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_SI_sample : Sample;
|
||||
def int_SI_sampleb : Sample;
|
||||
|
@ -157,7 +157,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
|
||||
(add SGPR_64, VCCReg, EXECReg)
|
||||
>;
|
||||
|
||||
def SReg_128 : RegisterClass<"AMDGPU", [v16i8, i128], 128, (add SGPR_128)>;
|
||||
def SReg_128 : RegisterClass<"AMDGPU", [i128], 128, (add SGPR_128)>;
|
||||
|
||||
def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
|
||||
|
||||
|
146
lib/Target/R600/SITypeRewriter.cpp
Normal file
146
lib/Target/R600/SITypeRewriter.cpp
Normal file
@ -0,0 +1,146 @@
|
||||
//===-- SITypeRewriter.cpp - Remove unwanted types ------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// This pass removes performs the following type substitution on all
|
||||
/// non-compute shaders:
|
||||
///
|
||||
/// v16i8 => i128
|
||||
/// - v16i8 is used for constant memory resource descriptors. This type is
|
||||
/// legal for some compute APIs, and we don't want to declare it as legal
|
||||
/// in the backend, because we want the legalizer to expand all v16i8
|
||||
/// operations.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/InstVisitor.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class SITypeRewriter : public FunctionPass,
|
||||
public InstVisitor<SITypeRewriter> {
|
||||
|
||||
static char ID;
|
||||
Module *Mod;
|
||||
Type *v16i8;
|
||||
Type *i128;
|
||||
|
||||
public:
|
||||
SITypeRewriter() : FunctionPass(ID) { }
|
||||
virtual bool doInitialization(Module &M);
|
||||
virtual bool runOnFunction(Function &F);
|
||||
virtual const char *getPassName() const {
|
||||
return "SI Type Rewriter";
|
||||
}
|
||||
void visitLoadInst(LoadInst &I);
|
||||
void visitCallInst(CallInst &I);
|
||||
void visitBitCast(BitCastInst &I);
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
char SITypeRewriter::ID = 0;
|
||||
|
||||
bool SITypeRewriter::doInitialization(Module &M) {
|
||||
Mod = &M;
|
||||
v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16);
|
||||
i128 = Type::getIntNTy(M.getContext(), 128);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SITypeRewriter::runOnFunction(Function &F) {
|
||||
AttributeSet Set = F.getAttributes();
|
||||
Attribute A = Set.getAttribute(AttributeSet::FunctionIndex, "ShaderType");
|
||||
|
||||
unsigned ShaderType = ShaderType::COMPUTE;
|
||||
if (A.isStringAttribute()) {
|
||||
StringRef Str = A.getValueAsString();
|
||||
Str.getAsInteger(0, ShaderType);
|
||||
}
|
||||
if (ShaderType != ShaderType::COMPUTE) {
|
||||
visit(F);
|
||||
}
|
||||
|
||||
visit(F);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void SITypeRewriter::visitLoadInst(LoadInst &I) {
|
||||
Value *Ptr = I.getPointerOperand();
|
||||
Type *PtrTy = Ptr->getType();
|
||||
Type *ElemTy = PtrTy->getPointerElementType();
|
||||
IRBuilder<> Builder(&I);
|
||||
if (ElemTy == v16i8) {
|
||||
Value *BitCast = Builder.CreateBitCast(Ptr, Type::getIntNPtrTy(I.getContext(), 128, 2));
|
||||
LoadInst *Load = Builder.CreateLoad(BitCast);
|
||||
SmallVector <std::pair<unsigned, MDNode*>, 8> MD;
|
||||
I.getAllMetadataOtherThanDebugLoc(MD);
|
||||
for (unsigned i = 0, e = MD.size(); i != e; ++i) {
|
||||
Load->setMetadata(MD[i].first, MD[i].second);
|
||||
}
|
||||
Value *BitCastLoad = Builder.CreateBitCast(Load, I.getType());
|
||||
I.replaceAllUsesWith(BitCastLoad);
|
||||
I.eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
void SITypeRewriter::visitCallInst(CallInst &I) {
|
||||
IRBuilder<> Builder(&I);
|
||||
SmallVector <Value*, 8> Args;
|
||||
SmallVector <Type*, 8> Types;
|
||||
bool NeedToReplace = false;
|
||||
Function *F = I.getCalledFunction();
|
||||
std::string Name = F->getName().str();
|
||||
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
|
||||
Value *Arg = I.getArgOperand(i);
|
||||
if (Arg->getType() == v16i8) {
|
||||
Args.push_back(Builder.CreateBitCast(Arg, i128));
|
||||
Types.push_back(i128);
|
||||
NeedToReplace = true;
|
||||
Name = Name + ".i128";
|
||||
} else {
|
||||
Args.push_back(Arg);
|
||||
Types.push_back(Arg->getType());
|
||||
}
|
||||
}
|
||||
|
||||
if (!NeedToReplace) {
|
||||
return;
|
||||
}
|
||||
Function *NewF = Mod->getFunction(Name);
|
||||
if (!NewF) {
|
||||
NewF = Function::Create(FunctionType::get(F->getReturnType(), Types, false), GlobalValue::ExternalLinkage, Name, Mod);
|
||||
NewF->setAttributes(F->getAttributes());
|
||||
}
|
||||
I.replaceAllUsesWith(Builder.CreateCall(NewF, Args));
|
||||
I.eraseFromParent();
|
||||
}
|
||||
|
||||
void SITypeRewriter::visitBitCast(BitCastInst &I) {
|
||||
IRBuilder<> Builder(&I);
|
||||
if (I.getDestTy() != i128) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (BitCastInst *Op = dyn_cast<BitCastInst>(I.getOperand(0))) {
|
||||
if (Op->getSrcTy() == i128) {
|
||||
I.replaceAllUsesWith(Op->getOperand(0));
|
||||
I.eraseFromParent();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createSITypeRewriter() {
|
||||
return new SITypeRewriter();
|
||||
}
|
@ -35,37 +35,37 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
||||
%v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
|
||||
%v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
|
||||
%res1 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v1,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 1)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 1)
|
||||
%res2 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v2,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 2)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 2)
|
||||
%res3 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v3,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 3)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 3)
|
||||
%res4 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v4,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 4)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 4)
|
||||
%res5 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v5,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 5)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 5)
|
||||
%res6 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v6,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 6)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 6)
|
||||
%res7 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v7,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 7)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 7)
|
||||
%res8 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v8,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 8)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 8)
|
||||
%res9 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v9,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 9)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 9)
|
||||
%res10 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v10,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 10)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 10)
|
||||
%res11 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v11,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 11)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 11)
|
||||
%res12 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v12,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 12)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 12)
|
||||
%res13 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v13,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 13)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 13)
|
||||
%res14 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v14,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 14)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 14)
|
||||
%res15 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v15,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 15)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 15)
|
||||
%res16 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v16,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 16)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 16)
|
||||
%e1 = extractelement <4 x float> %res1, i32 0
|
||||
%e2 = extractelement <4 x float> %res2, i32 1
|
||||
%e3 = extractelement <4 x float> %res3, i32 2
|
||||
@ -135,6 +135,6 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.SI.sample.(<4 x i32>, <32 x i8>, <4 x i32>, i32) readnone
|
||||
declare <4 x float> @llvm.SI.sample.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
@ -35,37 +35,37 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
||||
%v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
|
||||
%v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
|
||||
%res1 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v1,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 1)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 1)
|
||||
%res2 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v2,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 2)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 2)
|
||||
%res3 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v3,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 3)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 3)
|
||||
%res4 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v4,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 4)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 4)
|
||||
%res5 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v5,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 5)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 5)
|
||||
%res6 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v6,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 6)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 6)
|
||||
%res7 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v7,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 7)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 7)
|
||||
%res8 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v8,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 8)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 8)
|
||||
%res9 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v9,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 9)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 9)
|
||||
%res10 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v10,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 10)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 10)
|
||||
%res11 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v11,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 11)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 11)
|
||||
%res12 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v12,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 12)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 12)
|
||||
%res13 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v13,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 13)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 13)
|
||||
%res14 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v14,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 14)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 14)
|
||||
%res15 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v15,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 15)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 15)
|
||||
%res16 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v16,
|
||||
<32 x i8> undef, <4 x i32> undef, i32 16)
|
||||
<32 x i8> undef, <16 x i8> undef, i32 16)
|
||||
%e1 = extractelement <4 x float> %res1, i32 0
|
||||
%e2 = extractelement <4 x float> %res2, i32 1
|
||||
%e3 = extractelement <4 x float> %res3, i32 2
|
||||
@ -135,6 +135,6 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.SI.sampled.(<4 x i32>, <32 x i8>, <4 x i32>, i32) readnone
|
||||
declare <4 x float> @llvm.SI.sampled.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
Loading…
x
Reference in New Issue
Block a user