R600/SI: Custom lower SI_IF and SI_ELSE to avoid machine verifier errors

SI_IF and SI_ELSE are terminators which also produce a value.  For
these instructions ISel always inserts a COPY to move their value
to another basic block.  This COPY ends up between SI_(IF|ELSE)
and the S_BRANCH* instruction at the end of the block.

This breaks MachineBasicBlock::getFirstTerminator() and also the
machine verifier which assumes that terminators are grouped together at
the end of blocks.

To solve this we coalesce the copy away right after ISel to make sure
there are no instructions in between terminators at the end of blocks.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207591 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2014-04-29 23:12:53 +00:00
parent 2a90e446c0
commit 40e455d992
27 changed files with 88 additions and 40 deletions

View File

@ -444,19 +444,48 @@ SDValue SITargetLowering::LowerFormalArguments(
return Chain; return Chain;
} }
/// Usually ISel will insert a copy between terminator insturction that output
/// a value and the S_BRANCH* at the end of the block. This causes
/// MachineBasicBlock::getFirstTerminator() to return the incorrect value,
/// so we want to make sure there are no copies between terminators at the
/// end of blocks.
static void LowerTerminatorWithOutput(unsigned Opcode, MachineBasicBlock *BB,
MachineInstr *MI,
const TargetInstrInfo *TII,
MachineRegisterInfo &MRI) {
unsigned DstReg = MI->getOperand(0).getReg();
// Usually ISel will insert a copy between the SI_IF_NON_TERM instruction
// and the S_BRANCH* terminator. We want to replace SI_IF_NO_TERM with
// SI_IF and we can't have any instructions between S_BRANCH* and SI_IF,
// since they are both terminators
assert(MRI.hasOneUse(DstReg));
MachineOperand &Use = *MRI.use_begin(DstReg);
MachineInstr *UseMI = Use.getParent();
assert(UseMI->getOpcode() == AMDGPU::COPY);
MRI.replaceRegWith(UseMI->getOperand(0).getReg(), DstReg);
UseMI->eraseFromParent();
BuildMI(*BB, BB->getFirstTerminator(), MI->getDebugLoc(),
TII->get(Opcode))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(2));
MI->eraseFromParent();
}
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const { MachineInstr * MI, MachineBasicBlock * BB) const {
MachineBasicBlock::iterator I = *MI; MachineBasicBlock::iterator I = *MI;
const SIInstrInfo *TII =
static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
switch (MI->getOpcode()) { switch (MI->getOpcode()) {
default: default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::BRANCH: return BB; case AMDGPU::BRANCH: return BB;
case AMDGPU::SI_ADDR64_RSRC: { case AMDGPU::SI_ADDR64_RSRC: {
const SIInstrInfo *TII =
static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
unsigned SuperReg = MI->getOperand(0).getReg(); unsigned SuperReg = MI->getOperand(0).getReg();
unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass); unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass); unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
@ -481,9 +510,13 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MI->eraseFromParent(); MI->eraseFromParent();
break; break;
} }
case AMDGPU::V_SUB_F64: { case AMDGPU::SI_IF_NON_TERM:
const SIInstrInfo *TII = LowerTerminatorWithOutput(AMDGPU::SI_IF, BB, MI, TII, MRI);
static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); break;
case AMDGPU::SI_ELSE_NON_TERM:
LowerTerminatorWithOutput(AMDGPU::SI_ELSE, BB, MI, TII, MRI);
break;
case AMDGPU::V_SUB_F64:
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64),
MI->getOperand(0).getReg()) MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg()) .addReg(MI->getOperand(1).getReg())
@ -495,11 +528,9 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
.addImm(2); /* NEG */ .addImm(2); /* NEG */
MI->eraseFromParent(); MI->eraseFromParent();
break; break;
}
case AMDGPU::SI_RegisterStorePseudo: { case AMDGPU::SI_RegisterStorePseudo: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
const SIInstrInfo *TII =
static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
MachineInstrBuilder MIB = MachineInstrBuilder MIB =
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore), BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),

View File

@ -1411,21 +1411,38 @@ def LOAD_CONST : AMDGPUShaderInst <
let mayLoad = 1, mayStore = 1, hasSideEffects = 1, let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
Uses = [EXEC], Defs = [EXEC] in { Uses = [EXEC], Defs = [EXEC] in {
let usesCustomInserter = 1 in {
def SI_IF_NON_TERM : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$vcc, brtarget:$target), "",
[(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))]
>;
def SI_ELSE_NON_TERM : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src, brtarget:$target),
"",
[(set i64:$dst, (int_SI_else i64:$src, bb:$target))]
> {
let Constraints = "$src = $dst";
}
} // usesCustomInserter = 1
let isBranch = 1, isTerminator = 1 in { let isBranch = 1, isTerminator = 1 in {
def SI_IF : InstSI < def SI_IF: InstSI <
(outs SReg_64:$dst), (outs SReg_64:$dst),
(ins SReg_64:$vcc, brtarget:$target), (ins SReg_64:$vcc, brtarget:$target),
"SI_IF $dst, $vcc, $target", "", []
[(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))]
>; >;
def SI_ELSE : InstSI < def SI_ELSE : InstSI <
(outs SReg_64:$dst), (outs SReg_64:$dst),
(ins SReg_64:$src, brtarget:$target), (ins SReg_64:$src, brtarget:$target),
"SI_ELSE $dst, $src, $target", "", []
[(set i64:$dst, (int_SI_else i64:$src, bb:$target))]> { > {
let Constraints = "$src = $dst"; let Constraints = "$src = $dst";
} }

View File

@ -1,4 +1,4 @@
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.r600.read.tidig.x() readnone declare i32 @llvm.r600.read.tidig.x() readnone

View File

@ -1,4 +1,4 @@
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s
; Test that codegenprepare understands address space sizes ; Test that codegenprepare understands address space sizes

View File

@ -1,5 +1,5 @@
; XFAIL: * ; XFAIL: *
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI %s
declare i32 @llvm.SI.tid() readnone declare i32 @llvm.SI.tid() readnone

View File

@ -1,4 +1,4 @@
; RUN: not llc -march=r600 -mcpu=SI < %s 2>&1 | FileCheck %s ; RUN: not llc -march=r600 -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s
; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s ; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s
; CHECK: error: unsupported call to function defined_function in test_call ; CHECK: error: unsupported call to function defined_function in test_call

View File

@ -1,5 +1,5 @@
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: @anyext_load_i8: ; FUNC-LABEL: @anyext_load_i8:
; EG: AND_INT ; EG: AND_INT

View File

@ -1,4 +1,4 @@
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: @extract_vector_elt_v2i16 ; FUNC-LABEL: @extract_vector_elt_v2i16
; SI: BUFFER_LOAD_USHORT ; SI: BUFFER_LOAD_USHORT

View File

@ -1,4 +1,4 @@
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s
define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind { define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
; CHECK-LABEL: @use_gep_address_space: ; CHECK-LABEL: @use_gep_address_space:

View File

@ -1,6 +1,6 @@
; REQUIRES: asserts ; REQUIRES: asserts
; XFAIL: * ; XFAIL: *
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: @dynamic_insertelement_v2f64: ; SI-LABEL: @dynamic_insertelement_v2f64:

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK ; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
; RUN: llc < %s -march=r600 --mcpu=SI | FileCheck %s --check-prefix=SI-CHECK ; RUN: llc < %s -march=r600 --mcpu=SI -verify-machineinstrs| FileCheck %s --check-prefix=SI-CHECK
; R600-CHECK-LABEL: @sqrt_f32 ; R600-CHECK-LABEL: @sqrt_f32
; R600-CHECK: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z ; R600-CHECK: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z

View File

@ -1,4 +1,4 @@
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: @global_copy_i1_to_i1 ; SI-LABEL: @global_copy_i1_to_i1

View File

@ -1,4 +1,4 @@
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: @local_i32_load ; SI-LABEL: @local_i32_load
; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0] ; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0]

View File

@ -1,5 +1,5 @@
; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s ; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s ; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "r600--" target triple = "r600--"

View File

@ -1,4 +1,4 @@
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.SI.tid() nounwind readnone declare i32 @llvm.SI.tid() nounwind readnone

View File

@ -1,6 +1,6 @@
; XFAIL: * ; XFAIL: *
; REQUIRES: asserts ; REQUIRES: asserts
; RUN: llc -O0 -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI ; RUN: llc -O0 -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate

View File

@ -1,5 +1,5 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s ;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
; FUNC-LABEL: @setcc_v2i32 ; FUNC-LABEL: @setcc_v2i32
; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z

View File

@ -1,4 +1,4 @@
;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s ;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
; XXX: Merge this into setcc, once R600 supports 64-bit operations ; XXX: Merge this into setcc, once R600 supports 64-bit operations

View File

@ -1,4 +1,4 @@
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; Copy VGPR -> SGPR used twice as an instruction operand, which is then ; Copy VGPR -> SGPR used twice as an instruction operand, which is then
; used in an REG_SEQUENCE that also needs to be handled. ; used in an REG_SEQUENCE that also needs to be handled.

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
; This test checks that no VGPR to SGPR copies are created by the register ; This test checks that no VGPR to SGPR copies are created by the register
; allocator. ; allocator.

View File

@ -1,6 +1,6 @@
; REQUIRES: asserts ; REQUIRES: asserts
; XFAIL: * ; XFAIL: *
; RUN: llc -march=r600 -mcpu=SI -asm-verbose=false < %s | FileCheck %s ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s
define void @test(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind { define void @test(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {

View File

@ -1,5 +1,5 @@
; XFAIL: * ; XFAIL: *
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI
; SI-LABEL: @global_store_v3i64: ; SI-LABEL: @global_store_v3i64:
; SI: BUFFER_STORE_DWORDX4 ; SI: BUFFER_STORE_DWORDX4

View File

@ -1,6 +1,6 @@
; REQUIRES: asserts ; REQUIRES: asserts
; XFAIL: * ; XFAIL: *
; RUN: llc -march=r600 -mcpu=SI < %s ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s
define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind { define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
%p = getelementptr <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16> %p = getelementptr <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>

View File

@ -1,4 +1,4 @@
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: @global_truncstore_i32_to_i1 ; SI-LABEL: @global_truncstore_i32_to_i1

View File

@ -1,4 +1,4 @@
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone

View File

@ -1,4 +1,4 @@
;XUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s ;XUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
;FUNC-LABEL: @test_udiv ;FUNC-LABEL: @test_udiv

View File

@ -1,4 +1,4 @@
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: @unaligned_load_store_i32: ; SI-LABEL: @unaligned_load_store_i32:
; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]] ; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]