diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 3ed8dfa325f..c8a52971efa 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -496,6 +496,9 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; case AMDGPU::COPY: return AMDGPU::COPY; case AMDGPU::PHI: return AMDGPU::PHI; + case AMDGPU::S_MOV_B32: + return MI.getOperand(1).isReg() ? + TargetOpcode::COPY : AMDGPU::V_MOV_B32_e32; case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32; case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32; @@ -680,12 +683,57 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { while (!Worklist.empty()) { MachineInstr *Inst = Worklist.pop_back_val(); + MachineBasicBlock *MBB = Inst->getParent(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + // Handle some special cases + switch(Inst->getOpcode()) { + case AMDGPU::S_MOV_B64: { + DebugLoc DL = Inst->getDebugLoc(); + + // If the source operand is a register we can replace this with a + // copy + if (Inst->getOperand(1).isReg()) { + MachineInstr *Copy = BuildMI(*MBB, Inst, DL, + get(TargetOpcode::COPY)) + .addOperand(Inst->getOperand(0)) + .addOperand(Inst->getOperand(1)); + Worklist.push_back(Copy); + } else { + // Otherwise, we need to split this into two movs, because there is + // no 64-bit VALU move instruction. + unsigned LoDst, HiDst, Dst; + LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Dst = MRI.createVirtualRegister( + MRI.getRegClass(Inst->getOperand(0).getReg())); + + MachineInstr *Lo = BuildMI(*MBB, Inst, DL, get(AMDGPU::S_MOV_B32), + LoDst) + .addImm(Inst->getOperand(1).getImm() & 0xFFFFFFFF); + MachineInstr *Hi = BuildMI(*MBB, Inst, DL, get(AMDGPU::S_MOV_B32), + HiDst) + .addImm(Inst->getOperand(1).getImm() >> 32); + + BuildMI(*MBB, Inst, DL, get(TargetOpcode::REG_SEQUENCE), Dst) + .addReg(LoDst) + .addImm(AMDGPU::sub0) + .addReg(HiDst) + .addImm(AMDGPU::sub1); + + MRI.replaceRegWith(Inst->getOperand(0).getReg(), Dst); + Worklist.push_back(Lo); + Worklist.push_back(Hi); + } + Inst->eraseFromParent(); + continue; + } + } + unsigned NewOpcode = getVALUOp(*Inst); if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) continue; - MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo(); - // Use the new VALU Opcode. const MCInstrDesc &NewDesc = get(NewOpcode); Inst->setDesc(NewDesc); diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll new file mode 100644 index 00000000000..c989c9d6722 --- /dev/null +++ b/test/CodeGen/R600/salu-to-valu.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +; In this test both the pointer and the offset operands to the +; BUFFER_LOAD instructions end up being stored in vgprs. This +; requires us to add the pointer and offset together, store the +; result in the offset operand (vaddr), and then store 0 in an +; sgpr register pair and use that for the pointer operand +; (low 64-bits of srsrc). + +; CHECK-LABEL: @mubuf +; Make sure we aren't using VGPRs for the source operand of S_MOV_B64 +; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v +define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { +entry: + %0 = call i32 @llvm.r600.read.tidig.x() #1 + %1 = call i32 @llvm.r600.read.tidig.y() #1 + %2 = sext i32 %0 to i64 + %3 = sext i32 %1 to i64 + br label %loop + +loop: + %4 = phi i64 [0, %entry], [%5, %loop] + %5 = add i64 %2, %4 + %6 = getelementptr i8 addrspace(1)* %in, i64 %5 + %7 = load i8 addrspace(1)* %6, align 1 + %8 = or i64 %5, 1 + %9 = getelementptr i8 addrspace(1)* %in, i64 %8 + %10 = load i8 addrspace(1)* %9, align 1 + %11 = add i8 %7, %10 + %12 = sext i8 %11 to i32 + store i32 %12, i32 addrspace(1)* %out + %13 = icmp slt i64 %5, 10 + br i1 %13, label %loop, label %done + +done: + ret void +} + +declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.r600.read.tidig.y() #1 + +attributes #1 = { nounwind readnone }