mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-25 12:50:00 +00:00
R600/SI: Handle S_MOV_B64 in SIInstrInfo::moveToVALU()
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204475 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
af4c7da306
commit
1f1c0495d0
@ -496,6 +496,9 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
|
||||
case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
|
||||
case AMDGPU::COPY: return AMDGPU::COPY;
|
||||
case AMDGPU::PHI: return AMDGPU::PHI;
|
||||
case AMDGPU::S_MOV_B32:
|
||||
return MI.getOperand(1).isReg() ?
|
||||
TargetOpcode::COPY : AMDGPU::V_MOV_B32_e32;
|
||||
case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32;
|
||||
case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
|
||||
case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
|
||||
@ -680,12 +683,57 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
MachineInstr *Inst = Worklist.pop_back_val();
|
||||
MachineBasicBlock *MBB = Inst->getParent();
|
||||
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
|
||||
|
||||
// Handle some special cases
|
||||
switch(Inst->getOpcode()) {
|
||||
case AMDGPU::S_MOV_B64: {
|
||||
DebugLoc DL = Inst->getDebugLoc();
|
||||
|
||||
// If the source operand is a register we can replace this with a
|
||||
// copy
|
||||
if (Inst->getOperand(1).isReg()) {
|
||||
MachineInstr *Copy = BuildMI(*MBB, Inst, DL,
|
||||
get(TargetOpcode::COPY))
|
||||
.addOperand(Inst->getOperand(0))
|
||||
.addOperand(Inst->getOperand(1));
|
||||
Worklist.push_back(Copy);
|
||||
} else {
|
||||
// Otherwise, we need to split this into two movs, because there is
|
||||
// no 64-bit VALU move instruction.
|
||||
unsigned LoDst, HiDst, Dst;
|
||||
LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
|
||||
HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
|
||||
Dst = MRI.createVirtualRegister(
|
||||
MRI.getRegClass(Inst->getOperand(0).getReg()));
|
||||
|
||||
MachineInstr *Lo = BuildMI(*MBB, Inst, DL, get(AMDGPU::S_MOV_B32),
|
||||
LoDst)
|
||||
.addImm(Inst->getOperand(1).getImm() & 0xFFFFFFFF);
|
||||
MachineInstr *Hi = BuildMI(*MBB, Inst, DL, get(AMDGPU::S_MOV_B32),
|
||||
HiDst)
|
||||
.addImm(Inst->getOperand(1).getImm() >> 32);
|
||||
|
||||
BuildMI(*MBB, Inst, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
|
||||
.addReg(LoDst)
|
||||
.addImm(AMDGPU::sub0)
|
||||
.addReg(HiDst)
|
||||
.addImm(AMDGPU::sub1);
|
||||
|
||||
MRI.replaceRegWith(Inst->getOperand(0).getReg(), Dst);
|
||||
Worklist.push_back(Lo);
|
||||
Worklist.push_back(Hi);
|
||||
}
|
||||
Inst->eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned NewOpcode = getVALUOp(*Inst);
|
||||
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
|
||||
continue;
|
||||
|
||||
MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo();
|
||||
|
||||
// Use the new VALU Opcode.
|
||||
const MCInstrDesc &NewDesc = get(NewOpcode);
|
||||
Inst->setDesc(NewDesc);
|
||||
|
42
test/CodeGen/R600/salu-to-valu.ll
Normal file
42
test/CodeGen/R600/salu-to-valu.ll
Normal file
@ -0,0 +1,42 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
|
||||
|
||||
; In this test both the pointer and the offset operands to the
|
||||
; BUFFER_LOAD instructions end up being stored in vgprs. This
|
||||
; requires us to add the pointer and offset together, store the
|
||||
; result in the offset operand (vaddr), and then store 0 in an
|
||||
; sgpr register pair and use that for the pointer operand
|
||||
; (low 64-bits of srsrc).
|
||||
|
||||
; CHECK-LABEL: @mubuf
|
||||
; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
|
||||
; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
|
||||
define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%1 = call i32 @llvm.r600.read.tidig.y() #1
|
||||
%2 = sext i32 %0 to i64
|
||||
%3 = sext i32 %1 to i64
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%4 = phi i64 [0, %entry], [%5, %loop]
|
||||
%5 = add i64 %2, %4
|
||||
%6 = getelementptr i8 addrspace(1)* %in, i64 %5
|
||||
%7 = load i8 addrspace(1)* %6, align 1
|
||||
%8 = or i64 %5, 1
|
||||
%9 = getelementptr i8 addrspace(1)* %in, i64 %8
|
||||
%10 = load i8 addrspace(1)* %9, align 1
|
||||
%11 = add i8 %7, %10
|
||||
%12 = sext i8 %11 to i32
|
||||
store i32 %12, i32 addrspace(1)* %out
|
||||
%13 = icmp slt i64 %5, 10
|
||||
br i1 %13, label %loop, label %done
|
||||
|
||||
done:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.r600.read.tidig.y() #1
|
||||
|
||||
attributes #1 = { nounwind readnone }
|
Loading…
Reference in New Issue
Block a user