R600/SI: Handle S_MOV_B64 in SIInstrInfo::moveToVALU()

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204475 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2014-03-21 15:51:54 +00:00
parent af4c7da306
commit 1f1c0495d0
2 changed files with 92 additions and 2 deletions

View File

@ -496,6 +496,9 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
case AMDGPU::COPY: return AMDGPU::COPY;
case AMDGPU::PHI: return AMDGPU::PHI;
case AMDGPU::S_MOV_B32:
return MI.getOperand(1).isReg() ?
TargetOpcode::COPY : AMDGPU::V_MOV_B32_e32;
case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32;
case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
@ -680,12 +683,57 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
while (!Worklist.empty()) {
MachineInstr *Inst = Worklist.pop_back_val();
MachineBasicBlock *MBB = Inst->getParent();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
// Handle some special cases
switch(Inst->getOpcode()) {
case AMDGPU::S_MOV_B64: {
DebugLoc DL = Inst->getDebugLoc();
// If the source operand is a register we can replace this with a
// copy
if (Inst->getOperand(1).isReg()) {
MachineInstr *Copy = BuildMI(*MBB, Inst, DL,
get(TargetOpcode::COPY))
.addOperand(Inst->getOperand(0))
.addOperand(Inst->getOperand(1));
Worklist.push_back(Copy);
} else {
// Otherwise, we need to split this into two movs, because there is
// no 64-bit VALU move instruction.
unsigned LoDst, HiDst, Dst;
LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
Dst = MRI.createVirtualRegister(
MRI.getRegClass(Inst->getOperand(0).getReg()));
MachineInstr *Lo = BuildMI(*MBB, Inst, DL, get(AMDGPU::S_MOV_B32),
LoDst)
.addImm(Inst->getOperand(1).getImm() & 0xFFFFFFFF);
MachineInstr *Hi = BuildMI(*MBB, Inst, DL, get(AMDGPU::S_MOV_B32),
HiDst)
.addImm(Inst->getOperand(1).getImm() >> 32);
BuildMI(*MBB, Inst, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
.addReg(LoDst)
.addImm(AMDGPU::sub0)
.addReg(HiDst)
.addImm(AMDGPU::sub1);
MRI.replaceRegWith(Inst->getOperand(0).getReg(), Dst);
Worklist.push_back(Lo);
Worklist.push_back(Hi);
}
Inst->eraseFromParent();
continue;
}
}
unsigned NewOpcode = getVALUOp(*Inst);
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
continue;
MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo();
// Use the new VALU Opcode.
const MCInstrDesc &NewDesc = get(NewOpcode);
Inst->setDesc(NewDesc);

View File

@ -0,0 +1,42 @@
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
; In this test both the pointer and the offset operands to the
; BUFFER_LOAD instructions end up being stored in vgprs. This
; requires us to add the pointer and offset together, store the
; result in the offset operand (vaddr), and then store 0 in an
; sgpr register pair and use that for the pointer operand
; (low 64-bits of srsrc).
; CHECK-LABEL: @mubuf
; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #1
%1 = call i32 @llvm.r600.read.tidig.y() #1
%2 = sext i32 %0 to i64
%3 = sext i32 %1 to i64
br label %loop
loop:
%4 = phi i64 [0, %entry], [%5, %loop]
%5 = add i64 %2, %4
%6 = getelementptr i8 addrspace(1)* %in, i64 %5
%7 = load i8 addrspace(1)* %6, align 1
%8 = or i64 %5, 1
%9 = getelementptr i8 addrspace(1)* %in, i64 %8
%10 = load i8 addrspace(1)* %9, align 1
%11 = add i8 %7, %10
%12 = sext i8 %11 to i32
store i32 %12, i32 addrspace(1)* %out
%13 = icmp slt i64 %5, 10
br i1 %13, label %loop, label %done
done:
ret void
}
declare i32 @llvm.r600.read.tidig.x() #1
declare i32 @llvm.r600.read.tidig.y() #1
attributes #1 = { nounwind readnone }