mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-26 22:26:16 +00:00
R600/SI: Teach moveToVALU how to handle more S_LOAD_* instructions
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216220 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5f52739370
commit
fdbf61d00d
@ -1367,6 +1367,88 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
|
||||
}
|
||||
}
|
||||
|
||||
void SIInstrInfo::splitSMRD(MachineInstr *MI,
|
||||
const TargetRegisterClass *HalfRC,
|
||||
unsigned HalfImmOp, unsigned HalfSGPROp,
|
||||
MachineInstr *&Lo, MachineInstr *&Hi) const {
|
||||
|
||||
DebugLoc DL = MI->getDebugLoc();
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
|
||||
unsigned RegLo = MRI.createVirtualRegister(HalfRC);
|
||||
unsigned RegHi = MRI.createVirtualRegister(HalfRC);
|
||||
unsigned HalfSize = HalfRC->getSize();
|
||||
const MachineOperand *OffOp =
|
||||
getNamedOperand(*MI, AMDGPU::OpName::offset);
|
||||
const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase);
|
||||
|
||||
if (OffOp) {
|
||||
// Handle the _IMM variant
|
||||
unsigned LoOffset = OffOp->getImm();
|
||||
unsigned HiOffset = LoOffset + (HalfSize / 4);
|
||||
Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo)
|
||||
.addOperand(*SBase)
|
||||
.addImm(LoOffset);
|
||||
|
||||
if (!isUInt<8>(HiOffset)) {
|
||||
unsigned OffsetSGPR =
|
||||
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||
BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR)
|
||||
.addImm(HiOffset << 2); // The immediate offset is in dwords,
|
||||
// but offset in register is in bytes.
|
||||
Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
|
||||
.addOperand(*SBase)
|
||||
.addReg(OffsetSGPR);
|
||||
} else {
|
||||
Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi)
|
||||
.addOperand(*SBase)
|
||||
.addImm(HiOffset);
|
||||
}
|
||||
} else {
|
||||
// Handle the _SGPR variant
|
||||
MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff);
|
||||
Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo)
|
||||
.addOperand(*SBase)
|
||||
.addOperand(*SOff);
|
||||
unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||
BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
|
||||
.addOperand(*SOff)
|
||||
.addImm(HalfSize);
|
||||
Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
|
||||
.addOperand(*SBase)
|
||||
.addReg(OffsetSGPR);
|
||||
}
|
||||
|
||||
unsigned SubLo, SubHi;
|
||||
switch (HalfSize) {
|
||||
case 4:
|
||||
SubLo = AMDGPU::sub0;
|
||||
SubHi = AMDGPU::sub1;
|
||||
break;
|
||||
case 8:
|
||||
SubLo = AMDGPU::sub0_sub1;
|
||||
SubHi = AMDGPU::sub2_sub3;
|
||||
break;
|
||||
case 16:
|
||||
SubLo = AMDGPU::sub0_sub1_sub2_sub3;
|
||||
SubHi = AMDGPU::sub4_sub5_sub6_sub7;
|
||||
break;
|
||||
case 32:
|
||||
SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
|
||||
SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unhandled HalfSize");
|
||||
}
|
||||
|
||||
BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addReg(RegLo)
|
||||
.addImm(SubLo)
|
||||
.addReg(RegHi)
|
||||
.addImm(SubHi);
|
||||
}
|
||||
|
||||
void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
switch (MI->getOpcode()) {
|
||||
@ -1375,7 +1457,7 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
|
||||
case AMDGPU::S_LOAD_DWORDX2_IMM:
|
||||
case AMDGPU::S_LOAD_DWORDX2_SGPR:
|
||||
case AMDGPU::S_LOAD_DWORDX4_IMM:
|
||||
case AMDGPU::S_LOAD_DWORDX4_SGPR:
|
||||
case AMDGPU::S_LOAD_DWORDX4_SGPR: {
|
||||
unsigned NewOpcode = getVALUOp(*MI);
|
||||
unsigned RegOffset;
|
||||
unsigned ImmOffset;
|
||||
@ -1430,6 +1512,36 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
|
||||
}
|
||||
MI->getOperand(1).setReg(SRsrc);
|
||||
MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
|
||||
|
||||
const TargetRegisterClass *NewDstRC =
|
||||
RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
|
||||
|
||||
unsigned DstReg = MI->getOperand(0).getReg();
|
||||
unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
|
||||
MRI.replaceRegWith(DstReg, NewDstReg);
|
||||
break;
|
||||
}
|
||||
case AMDGPU::S_LOAD_DWORDX8_IMM:
|
||||
case AMDGPU::S_LOAD_DWORDX8_SGPR: {
|
||||
MachineInstr *Lo, *Hi;
|
||||
splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM,
|
||||
AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi);
|
||||
MI->eraseFromParent();
|
||||
moveSMRDToVALU(Lo, MRI);
|
||||
moveSMRDToVALU(Hi, MRI);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::S_LOAD_DWORDX16_IMM:
|
||||
case AMDGPU::S_LOAD_DWORDX16_SGPR: {
|
||||
MachineInstr *Lo, *Hi;
|
||||
splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM,
|
||||
AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi);
|
||||
MI->eraseFromParent();
|
||||
moveSMRDToVALU(Lo, MRI);
|
||||
moveSMRDToVALU(Hi, MRI);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -170,6 +170,12 @@ public:
|
||||
/// create new instruction and insert them before \p MI.
|
||||
void legalizeOperands(MachineInstr *MI) const;
|
||||
|
||||
/// \brief Split an SMRD instruction into two smaller loads of half the
|
||||
// size storing the results in \p Lo and \p Hi.
|
||||
void splitSMRD(MachineInstr *MI, const TargetRegisterClass *HalfRC,
|
||||
unsigned HalfImmOp, unsigned HalfSGPROp,
|
||||
MachineInstr *&Lo, MachineInstr *&Hi) const;
|
||||
|
||||
void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const;
|
||||
|
||||
/// \brief Replace this instruction's opcode with the equivalent VALU
|
||||
|
@ -88,3 +88,31 @@ entry:
|
||||
store i32 %3, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @s_load_imm_v8i32
|
||||
; CHECK: BUFFER_LOAD_DWORDX4
|
||||
; CHECK: BUFFER_LOAD_DWORDX4
|
||||
define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
|
||||
entry:
|
||||
%tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tmp1 = getelementptr inbounds i32 addrspace(2)* %in, i32 %tmp0
|
||||
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
|
||||
%tmp3 = load <8 x i32> addrspace(2)* %tmp2, align 4
|
||||
store <8 x i32> %tmp3, <8 x i32> addrspace(1)* %out, align 32
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @s_load_imm_v16i32
|
||||
; CHECK: BUFFER_LOAD_DWORDX4
|
||||
; CHECK: BUFFER_LOAD_DWORDX4
|
||||
; CHECK: BUFFER_LOAD_DWORDX4
|
||||
; CHECK: BUFFER_LOAD_DWORDX4
|
||||
define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
|
||||
entry:
|
||||
%tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tmp1 = getelementptr inbounds i32 addrspace(2)* %in, i32 %tmp0
|
||||
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
|
||||
%tmp3 = load <16 x i32> addrspace(2)* %tmp2, align 4
|
||||
store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user