mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-25 04:39:44 +00:00
R600/SI: Custom lower SI_IF and SI_ELSE to avoid machine verifier errors
SI_IF and SI_ELSE are terminators which also produce a value. For these instructions ISel always inserts a COPY to move their value to another basic block. This COPY ends up between SI_(IF|ELSE) and the S_BRANCH* instruction at the end of the block. This breaks MachineBasicBlock::getFirstTerminator() and also the machine verifier which assumes that terminators are grouped together at the end of blocks. To solve this we coalesce the copy away right after ISel to make sure there are no instructions in between terminators at the end of blocks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207591 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2a90e446c0
commit
40e455d992
@ -444,19 +444,48 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
return Chain;
|
||||
}
|
||||
|
||||
/// Usually ISel will insert a copy between terminator insturction that output
|
||||
/// a value and the S_BRANCH* at the end of the block. This causes
|
||||
/// MachineBasicBlock::getFirstTerminator() to return the incorrect value,
|
||||
/// so we want to make sure there are no copies between terminators at the
|
||||
/// end of blocks.
|
||||
static void LowerTerminatorWithOutput(unsigned Opcode, MachineBasicBlock *BB,
|
||||
MachineInstr *MI,
|
||||
const TargetInstrInfo *TII,
|
||||
MachineRegisterInfo &MRI) {
|
||||
unsigned DstReg = MI->getOperand(0).getReg();
|
||||
// Usually ISel will insert a copy between the SI_IF_NON_TERM instruction
|
||||
// and the S_BRANCH* terminator. We want to replace SI_IF_NO_TERM with
|
||||
// SI_IF and we can't have any instructions between S_BRANCH* and SI_IF,
|
||||
// since they are both terminators
|
||||
assert(MRI.hasOneUse(DstReg));
|
||||
MachineOperand &Use = *MRI.use_begin(DstReg);
|
||||
MachineInstr *UseMI = Use.getParent();
|
||||
assert(UseMI->getOpcode() == AMDGPU::COPY);
|
||||
|
||||
MRI.replaceRegWith(UseMI->getOperand(0).getReg(), DstReg);
|
||||
UseMI->eraseFromParent();
|
||||
BuildMI(*BB, BB->getFirstTerminator(), MI->getDebugLoc(),
|
||||
TII->get(Opcode))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addOperand(MI->getOperand(2));
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
|
||||
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||
MachineInstr * MI, MachineBasicBlock * BB) const {
|
||||
|
||||
MachineBasicBlock::iterator I = *MI;
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
|
||||
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
|
||||
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
||||
case AMDGPU::BRANCH: return BB;
|
||||
case AMDGPU::SI_ADDR64_RSRC: {
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
|
||||
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
|
||||
unsigned SuperReg = MI->getOperand(0).getReg();
|
||||
unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
|
||||
unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
|
||||
@ -481,9 +510,13 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
case AMDGPU::V_SUB_F64: {
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
|
||||
case AMDGPU::SI_IF_NON_TERM:
|
||||
LowerTerminatorWithOutput(AMDGPU::SI_IF, BB, MI, TII, MRI);
|
||||
break;
|
||||
case AMDGPU::SI_ELSE_NON_TERM:
|
||||
LowerTerminatorWithOutput(AMDGPU::SI_ELSE, BB, MI, TII, MRI);
|
||||
break;
|
||||
case AMDGPU::V_SUB_F64:
|
||||
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64),
|
||||
MI->getOperand(0).getReg())
|
||||
.addReg(MI->getOperand(1).getReg())
|
||||
@ -495,11 +528,9 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||
.addImm(2); /* NEG */
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::SI_RegisterStorePseudo: {
|
||||
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
|
||||
unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
|
||||
|
@ -1411,21 +1411,38 @@ def LOAD_CONST : AMDGPUShaderInst <
|
||||
let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
|
||||
Uses = [EXEC], Defs = [EXEC] in {
|
||||
|
||||
let usesCustomInserter = 1 in {
|
||||
|
||||
def SI_IF_NON_TERM : InstSI <
|
||||
(outs SReg_64:$dst),
|
||||
(ins SReg_64:$vcc, brtarget:$target), "",
|
||||
[(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))]
|
||||
>;
|
||||
|
||||
def SI_ELSE_NON_TERM : InstSI <
|
||||
(outs SReg_64:$dst),
|
||||
(ins SReg_64:$src, brtarget:$target),
|
||||
"",
|
||||
[(set i64:$dst, (int_SI_else i64:$src, bb:$target))]
|
||||
> {
|
||||
let Constraints = "$src = $dst";
|
||||
}
|
||||
|
||||
} // usesCustomInserter = 1
|
||||
|
||||
let isBranch = 1, isTerminator = 1 in {
|
||||
|
||||
def SI_IF : InstSI <
|
||||
def SI_IF: InstSI <
|
||||
(outs SReg_64:$dst),
|
||||
(ins SReg_64:$vcc, brtarget:$target),
|
||||
"SI_IF $dst, $vcc, $target",
|
||||
[(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))]
|
||||
"", []
|
||||
>;
|
||||
|
||||
def SI_ELSE : InstSI <
|
||||
(outs SReg_64:$dst),
|
||||
(ins SReg_64:$src, brtarget:$target),
|
||||
"SI_ELSE $dst, $src, $target",
|
||||
[(set i64:$dst, (int_SI_else i64:$src, bb:$target))]> {
|
||||
|
||||
"", []
|
||||
> {
|
||||
let Constraints = "$src = $dst";
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() readnone
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s
|
||||
|
||||
; Test that codegenprepare understands address space sizes
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; XFAIL: *
|
||||
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
|
||||
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI %s
|
||||
|
||||
declare i32 @llvm.SI.tid() readnone
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: not llc -march=r600 -mcpu=SI < %s 2>&1 | FileCheck %s
|
||||
; RUN: not llc -march=r600 -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s
|
||||
; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: error: unsupported call to function defined_function in test_call
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: @anyext_load_i8:
|
||||
; EG: AND_INT
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: @extract_vector_elt_v2i16
|
||||
; SI: BUFFER_LOAD_USHORT
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s
|
||||
|
||||
define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
|
||||
; CHECK-LABEL: @use_gep_address_space:
|
||||
|
@ -1,6 +1,6 @@
|
||||
; REQUIRES: asserts
|
||||
; XFAIL: *
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
|
||||
; SI-LABEL: @dynamic_insertelement_v2f64:
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
|
||||
; RUN: llc < %s -march=r600 --mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
|
||||
; RUN: llc < %s -march=r600 --mcpu=SI -verify-machineinstrs| FileCheck %s --check-prefix=SI-CHECK
|
||||
|
||||
; R600-CHECK-LABEL: @sqrt_f32
|
||||
; R600-CHECK: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
|
||||
; SI-LABEL: @global_copy_i1_to_i1
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SI-LABEL: @local_i32_load
|
||||
; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0]
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
|
||||
; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
||||
; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
target triple = "r600--"
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare i32 @llvm.SI.tid() nounwind readnone
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
; XFAIL: *
|
||||
; REQUIRES: asserts
|
||||
; RUN: llc -O0 -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI
|
||||
; RUN: llc -O0 -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
|
||||
|
||||
declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
|
||||
;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
||||
;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: @setcc_v2i32
|
||||
; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z
|
||||
|
@ -1,4 +1,4 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
||||
;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
||||
|
||||
; XXX: Merge this into setcc, once R600 supports 64-bit operations
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; Copy VGPR -> SGPR used twice as an instruction operand, which is then
|
||||
; used in an REG_SEQUENCE that also needs to be handled.
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
|
||||
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; This test checks that no VGPR to SGPR copies are created by the register
|
||||
; allocator.
|
||||
|
@ -1,6 +1,6 @@
|
||||
; REQUIRES: asserts
|
||||
; XFAIL: *
|
||||
; RUN: llc -march=r600 -mcpu=SI -asm-verbose=false < %s | FileCheck %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s
|
||||
|
||||
|
||||
define void @test(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
|
||||
|
@ -1,5 +1,5 @@
|
||||
; XFAIL: *
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI
|
||||
|
||||
; SI-LABEL: @global_store_v3i64:
|
||||
; SI: BUFFER_STORE_DWORDX4
|
||||
|
@ -1,6 +1,6 @@
|
||||
; REQUIRES: asserts
|
||||
; XFAIL: *
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s
|
||||
|
||||
define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
|
||||
%p = getelementptr <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
|
||||
; SI-LABEL: @global_truncstore_i32_to_i1
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
;XUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
||||
;XUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
|
||||
|
||||
;FUNC-LABEL: @test_udiv
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SI-LABEL: @unaligned_load_store_i32:
|
||||
; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]
|
||||
|
Loading…
Reference in New Issue
Block a user