mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-14 13:57:51 +00:00
R600/SI: Teach moveToVALU how to handle more S_LOAD_* instructions
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216220 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5f52739370
commit
fdbf61d00d
@ -1367,6 +1367,88 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SIInstrInfo::splitSMRD(MachineInstr *MI,
|
||||||
|
const TargetRegisterClass *HalfRC,
|
||||||
|
unsigned HalfImmOp, unsigned HalfSGPROp,
|
||||||
|
MachineInstr *&Lo, MachineInstr *&Hi) const {
|
||||||
|
|
||||||
|
DebugLoc DL = MI->getDebugLoc();
|
||||||
|
MachineBasicBlock *MBB = MI->getParent();
|
||||||
|
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
|
||||||
|
unsigned RegLo = MRI.createVirtualRegister(HalfRC);
|
||||||
|
unsigned RegHi = MRI.createVirtualRegister(HalfRC);
|
||||||
|
unsigned HalfSize = HalfRC->getSize();
|
||||||
|
const MachineOperand *OffOp =
|
||||||
|
getNamedOperand(*MI, AMDGPU::OpName::offset);
|
||||||
|
const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase);
|
||||||
|
|
||||||
|
if (OffOp) {
|
||||||
|
// Handle the _IMM variant
|
||||||
|
unsigned LoOffset = OffOp->getImm();
|
||||||
|
unsigned HiOffset = LoOffset + (HalfSize / 4);
|
||||||
|
Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo)
|
||||||
|
.addOperand(*SBase)
|
||||||
|
.addImm(LoOffset);
|
||||||
|
|
||||||
|
if (!isUInt<8>(HiOffset)) {
|
||||||
|
unsigned OffsetSGPR =
|
||||||
|
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||||
|
BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR)
|
||||||
|
.addImm(HiOffset << 2); // The immediate offset is in dwords,
|
||||||
|
// but offset in register is in bytes.
|
||||||
|
Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
|
||||||
|
.addOperand(*SBase)
|
||||||
|
.addReg(OffsetSGPR);
|
||||||
|
} else {
|
||||||
|
Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi)
|
||||||
|
.addOperand(*SBase)
|
||||||
|
.addImm(HiOffset);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Handle the _SGPR variant
|
||||||
|
MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff);
|
||||||
|
Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo)
|
||||||
|
.addOperand(*SBase)
|
||||||
|
.addOperand(*SOff);
|
||||||
|
unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||||
|
BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
|
||||||
|
.addOperand(*SOff)
|
||||||
|
.addImm(HalfSize);
|
||||||
|
Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
|
||||||
|
.addOperand(*SBase)
|
||||||
|
.addReg(OffsetSGPR);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned SubLo, SubHi;
|
||||||
|
switch (HalfSize) {
|
||||||
|
case 4:
|
||||||
|
SubLo = AMDGPU::sub0;
|
||||||
|
SubHi = AMDGPU::sub1;
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
SubLo = AMDGPU::sub0_sub1;
|
||||||
|
SubHi = AMDGPU::sub2_sub3;
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
SubLo = AMDGPU::sub0_sub1_sub2_sub3;
|
||||||
|
SubHi = AMDGPU::sub4_sub5_sub6_sub7;
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
|
||||||
|
SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
llvm_unreachable("Unhandled HalfSize");
|
||||||
|
}
|
||||||
|
|
||||||
|
BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE))
|
||||||
|
.addOperand(MI->getOperand(0))
|
||||||
|
.addReg(RegLo)
|
||||||
|
.addImm(SubLo)
|
||||||
|
.addReg(RegHi)
|
||||||
|
.addImm(SubHi);
|
||||||
|
}
|
||||||
|
|
||||||
void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
|
void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
|
||||||
MachineBasicBlock *MBB = MI->getParent();
|
MachineBasicBlock *MBB = MI->getParent();
|
||||||
switch (MI->getOpcode()) {
|
switch (MI->getOpcode()) {
|
||||||
@ -1375,7 +1457,7 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
|
|||||||
case AMDGPU::S_LOAD_DWORDX2_IMM:
|
case AMDGPU::S_LOAD_DWORDX2_IMM:
|
||||||
case AMDGPU::S_LOAD_DWORDX2_SGPR:
|
case AMDGPU::S_LOAD_DWORDX2_SGPR:
|
||||||
case AMDGPU::S_LOAD_DWORDX4_IMM:
|
case AMDGPU::S_LOAD_DWORDX4_IMM:
|
||||||
case AMDGPU::S_LOAD_DWORDX4_SGPR:
|
case AMDGPU::S_LOAD_DWORDX4_SGPR: {
|
||||||
unsigned NewOpcode = getVALUOp(*MI);
|
unsigned NewOpcode = getVALUOp(*MI);
|
||||||
unsigned RegOffset;
|
unsigned RegOffset;
|
||||||
unsigned ImmOffset;
|
unsigned ImmOffset;
|
||||||
@ -1430,6 +1512,36 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
|
|||||||
}
|
}
|
||||||
MI->getOperand(1).setReg(SRsrc);
|
MI->getOperand(1).setReg(SRsrc);
|
||||||
MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
|
MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
|
||||||
|
|
||||||
|
const TargetRegisterClass *NewDstRC =
|
||||||
|
RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
|
||||||
|
|
||||||
|
unsigned DstReg = MI->getOperand(0).getReg();
|
||||||
|
unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
|
||||||
|
MRI.replaceRegWith(DstReg, NewDstReg);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case AMDGPU::S_LOAD_DWORDX8_IMM:
|
||||||
|
case AMDGPU::S_LOAD_DWORDX8_SGPR: {
|
||||||
|
MachineInstr *Lo, *Hi;
|
||||||
|
splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM,
|
||||||
|
AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi);
|
||||||
|
MI->eraseFromParent();
|
||||||
|
moveSMRDToVALU(Lo, MRI);
|
||||||
|
moveSMRDToVALU(Hi, MRI);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case AMDGPU::S_LOAD_DWORDX16_IMM:
|
||||||
|
case AMDGPU::S_LOAD_DWORDX16_SGPR: {
|
||||||
|
MachineInstr *Lo, *Hi;
|
||||||
|
splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM,
|
||||||
|
AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi);
|
||||||
|
MI->eraseFromParent();
|
||||||
|
moveSMRDToVALU(Lo, MRI);
|
||||||
|
moveSMRDToVALU(Hi, MRI);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -170,6 +170,12 @@ public:
|
|||||||
/// create new instruction and insert them before \p MI.
|
/// create new instruction and insert them before \p MI.
|
||||||
void legalizeOperands(MachineInstr *MI) const;
|
void legalizeOperands(MachineInstr *MI) const;
|
||||||
|
|
||||||
|
/// \brief Split an SMRD instruction into two smaller loads of half the
|
||||||
|
// size storing the results in \p Lo and \p Hi.
|
||||||
|
void splitSMRD(MachineInstr *MI, const TargetRegisterClass *HalfRC,
|
||||||
|
unsigned HalfImmOp, unsigned HalfSGPROp,
|
||||||
|
MachineInstr *&Lo, MachineInstr *&Hi) const;
|
||||||
|
|
||||||
void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const;
|
void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const;
|
||||||
|
|
||||||
/// \brief Replace this instruction's opcode with the equivalent VALU
|
/// \brief Replace this instruction's opcode with the equivalent VALU
|
||||||
|
@ -88,3 +88,31 @@ entry:
|
|||||||
store i32 %3, i32 addrspace(1)* %out
|
store i32 %3, i32 addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @s_load_imm_v8i32
|
||||||
|
; CHECK: BUFFER_LOAD_DWORDX4
|
||||||
|
; CHECK: BUFFER_LOAD_DWORDX4
|
||||||
|
define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
|
||||||
|
entry:
|
||||||
|
%tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||||
|
%tmp1 = getelementptr inbounds i32 addrspace(2)* %in, i32 %tmp0
|
||||||
|
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
|
||||||
|
%tmp3 = load <8 x i32> addrspace(2)* %tmp2, align 4
|
||||||
|
store <8 x i32> %tmp3, <8 x i32> addrspace(1)* %out, align 32
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @s_load_imm_v16i32
|
||||||
|
; CHECK: BUFFER_LOAD_DWORDX4
|
||||||
|
; CHECK: BUFFER_LOAD_DWORDX4
|
||||||
|
; CHECK: BUFFER_LOAD_DWORDX4
|
||||||
|
; CHECK: BUFFER_LOAD_DWORDX4
|
||||||
|
define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
|
||||||
|
entry:
|
||||||
|
%tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||||
|
%tmp1 = getelementptr inbounds i32 addrspace(2)* %in, i32 %tmp0
|
||||||
|
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
|
||||||
|
%tmp3 = load <16 x i32> addrspace(2)* %tmp2, align 4
|
||||||
|
store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user