R600: Check if a sextload should be used for parameter loads.

Through some oddity where truncate (sextload x) isn't folded into
an anyextload for vectors, the sextload remains if the
vector isn't immediately scalarized. This keeps the expected
zextload instructions in the kernel-args test when small type
vectors aren't scalarized.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206070 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2014-04-11 20:59:54 +00:00
parent a02c32f31d
commit 4aee5942c3
3 changed files with 20 additions and 14 deletions

View File

@ -1394,7 +1394,12 @@ SDValue R600TargetLowering::LowerFormalArguments(
// The first 36 bytes of the input buffer contains information about // The first 36 bytes of the input buffer contains information about
// thread group and global sizes. // thread group and global sizes.
SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
// FIXME: This should really check the extload type, but the handling of
// extload vecto parameters seems to be broken.
//ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
ISD::LoadExtType Ext = ISD::SEXTLOAD;
SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32), DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
MachinePointerInfo(UndefValue::get(PtrTy)), MachinePointerInfo(UndefValue::get(PtrTy)),
MemVT, false, false, 4); MemVT, false, false, 4);

View File

@ -224,7 +224,7 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDLoc DL, SDValue Chain, SDLoc DL, SDValue Chain,
unsigned Offset) const { unsigned Offset, bool Signed) const {
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::CONSTANT_ADDRESS); AMDGPUAS::CONSTANT_ADDRESS);
@ -232,7 +232,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64); MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64);
SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
DAG.getConstant(Offset, MVT::i64)); DAG.getConstant(Offset, MVT::i64));
return DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, Ptr, return DAG.getExtLoad(Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL, VT, Chain, Ptr,
MachinePointerInfo(UndefValue::get(PtrTy)), MemVT, MachinePointerInfo(UndefValue::get(PtrTy)), MemVT,
false, false, MemVT.getSizeInBits() >> 3); false, false, MemVT.getSizeInBits() >> 3);
@ -340,7 +340,8 @@ SDValue SITargetLowering::LowerFormalArguments(
// The first 36 bytes of the input buffer contains information about // The first 36 bytes of the input buffer contains information about
// thread group and global sizes. // thread group and global sizes.
SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(), SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
36 + VA.getLocMemOffset()); 36 + VA.getLocMemOffset(),
Ins[i].Flags.isSExt());
InVals.push_back(Arg); InVals.push_back(Arg);
continue; continue;
} }
@ -533,23 +534,23 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (IntrinsicID) { switch (IntrinsicID) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case Intrinsic::r600_read_ngroups_x: case Intrinsic::r600_read_ngroups_x:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0); return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0, false);
case Intrinsic::r600_read_ngroups_y: case Intrinsic::r600_read_ngroups_y:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4); return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4, false);
case Intrinsic::r600_read_ngroups_z: case Intrinsic::r600_read_ngroups_z:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8); return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8, false);
case Intrinsic::r600_read_global_size_x: case Intrinsic::r600_read_global_size_x:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12); return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12, false);
case Intrinsic::r600_read_global_size_y: case Intrinsic::r600_read_global_size_y:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16); return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16, false);
case Intrinsic::r600_read_global_size_z: case Intrinsic::r600_read_global_size_z:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20); return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20, false);
case Intrinsic::r600_read_local_size_x: case Intrinsic::r600_read_local_size_x:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24); return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24, false);
case Intrinsic::r600_read_local_size_y: case Intrinsic::r600_read_local_size_y:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28); return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28, false);
case Intrinsic::r600_read_local_size_z: case Intrinsic::r600_read_local_size_z:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32); return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32, false);
case Intrinsic::r600_read_tgid_x: case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT); AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT);

View File

@ -22,7 +22,7 @@ namespace llvm {
class SITargetLowering : public AMDGPUTargetLowering { class SITargetLowering : public AMDGPUTargetLowering {
SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL, SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL,
SDValue Chain, unsigned Offset) const; SDValue Chain, unsigned Offset, bool Signed) const;
SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op, SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
SelectionDAG &DAG) const; SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;