diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index a4c808ce20a..8c686c91502 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -444,19 +444,48 @@ SDValue SITargetLowering::LowerFormalArguments( return Chain; } +/// Usually ISel will insert a copy between terminator insturction that output +/// a value and the S_BRANCH* at the end of the block. This causes +/// MachineBasicBlock::getFirstTerminator() to return the incorrect value, +/// so we want to make sure there are no copies between terminators at the +/// end of blocks. +static void LowerTerminatorWithOutput(unsigned Opcode, MachineBasicBlock *BB, + MachineInstr *MI, + const TargetInstrInfo *TII, + MachineRegisterInfo &MRI) { + unsigned DstReg = MI->getOperand(0).getReg(); + // Usually ISel will insert a copy between the SI_IF_NON_TERM instruction + // and the S_BRANCH* terminator. We want to replace SI_IF_NO_TERM with + // SI_IF and we can't have any instructions between S_BRANCH* and SI_IF, + // since they are both terminators + assert(MRI.hasOneUse(DstReg)); + MachineOperand &Use = *MRI.use_begin(DstReg); + MachineInstr *UseMI = Use.getParent(); + assert(UseMI->getOpcode() == AMDGPU::COPY); + + MRI.replaceRegWith(UseMI->getOperand(0).getReg(), DstReg); + UseMI->eraseFromParent(); + BuildMI(*BB, BB->getFirstTerminator(), MI->getDebugLoc(), + TII->get(Opcode)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(2)); + MI->eraseFromParent(); +} + MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MachineInstr * MI, MachineBasicBlock * BB) const { MachineBasicBlock::iterator I = *MI; + const SIInstrInfo *TII = + static_cast(getTargetMachine().getInstrInfo()); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); case AMDGPU::BRANCH: return BB; case AMDGPU::SI_ADDR64_RSRC: { - const SIInstrInfo *TII = - static_cast(getTargetMachine().getInstrInfo()); - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); unsigned SuperReg = MI->getOperand(0).getReg(); unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass); unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass); @@ -481,9 +510,13 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MI->eraseFromParent(); break; } - case AMDGPU::V_SUB_F64: { - const SIInstrInfo *TII = - static_cast(getTargetMachine().getInstrInfo()); + case AMDGPU::SI_IF_NON_TERM: + LowerTerminatorWithOutput(AMDGPU::SI_IF, BB, MI, TII, MRI); + break; + case AMDGPU::SI_ELSE_NON_TERM: + LowerTerminatorWithOutput(AMDGPU::SI_ELSE, BB, MI, TII, MRI); + break; + case AMDGPU::V_SUB_F64: BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) @@ -495,11 +528,9 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( .addImm(2); /* NEG */ MI->eraseFromParent(); break; - } + case AMDGPU::SI_RegisterStorePseudo: { MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const SIInstrInfo *TII = - static_cast(getTargetMachine().getInstrInfo()); unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); MachineInstrBuilder MIB = BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore), diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index a8aefc22871..00f9be61e24 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1411,21 +1411,38 @@ def LOAD_CONST : AMDGPUShaderInst < let mayLoad = 1, mayStore = 1, hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC] in { +let usesCustomInserter = 1 in { + +def SI_IF_NON_TERM : InstSI < + (outs SReg_64:$dst), + (ins SReg_64:$vcc, brtarget:$target), "", + [(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))] +>; + +def SI_ELSE_NON_TERM : InstSI < + (outs SReg_64:$dst), + (ins SReg_64:$src, brtarget:$target), + "", + [(set i64:$dst, (int_SI_else i64:$src, bb:$target))] +> { + let Constraints = "$src = $dst"; +} + +} // usesCustomInserter = 1 + let isBranch = 1, isTerminator = 1 in { -def SI_IF : InstSI < +def SI_IF: InstSI < (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target), - "SI_IF $dst, $vcc, $target", - [(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))] + "", [] >; def SI_ELSE : InstSI < (outs SReg_64:$dst), (ins SReg_64:$src, brtarget:$target), - "SI_ELSE $dst, $src, $target", - [(set i64:$dst, (int_SI_else i64:$src, bb:$target))]> { - + "", [] +> { let Constraints = "$src = $dst"; } diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll index 7081b077d0c..c9eaedae4a3 100644 --- a/test/CodeGen/R600/add_i64.ll +++ b/test/CodeGen/R600/add_i64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s declare i32 @llvm.r600.read.tidig.x() readnone diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/R600/address-space.ll index 9ebf3fc07b8..f75a8ac5e6a 100644 --- a/test/CodeGen/R600/address-space.ll +++ b/test/CodeGen/R600/address-space.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s ; Test that codegenprepare understands address space sizes diff --git a/test/CodeGen/R600/array-ptr-calc-i64.ll b/test/CodeGen/R600/array-ptr-calc-i64.ll index 652bbfe2a41..e254c5f6463 100644 --- a/test/CodeGen/R600/array-ptr-calc-i64.ll +++ b/test/CodeGen/R600/array-ptr-calc-i64.ll @@ -1,5 +1,5 @@ ; XFAIL: * -; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI %s declare i32 @llvm.SI.tid() readnone diff --git a/test/CodeGen/R600/call.ll b/test/CodeGen/R600/call.ll index c8350b85f44..d80347490b3 100644 --- a/test/CodeGen/R600/call.ll +++ b/test/CodeGen/R600/call.ll @@ -1,4 +1,4 @@ -; RUN: not llc -march=r600 -mcpu=SI < %s 2>&1 | FileCheck %s +; RUN: not llc -march=r600 -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s ; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s ; CHECK: error: unsupported call to function defined_function in test_call diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll index daa76857a14..dc056e0ecdd 100644 --- a/test/CodeGen/R600/extload.ll +++ b/test/CodeGen/R600/extload.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: @anyext_load_i8: ; EG: AND_INT diff --git a/test/CodeGen/R600/extract_vector_elt_i16.ll b/test/CodeGen/R600/extract_vector_elt_i16.ll index e1b038a139e..5cd1b04bd1d 100644 --- a/test/CodeGen/R600/extract_vector_elt_i16.ll +++ b/test/CodeGen/R600/extract_vector_elt_i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: @extract_vector_elt_v2i16 ; SI: BUFFER_LOAD_USHORT diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/R600/gep-address-space.ll index b36f6122eea..ab2c0bf92fe 100644 --- a/test/CodeGen/R600/gep-address-space.ll +++ b/test/CodeGen/R600/gep-address-space.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind { ; CHECK-LABEL: @use_gep_address_space: diff --git a/test/CodeGen/R600/insert_vector_elt_f64.ll b/test/CodeGen/R600/insert_vector_elt_f64.ll index e334be17491..595bc59655a 100644 --- a/test/CodeGen/R600/insert_vector_elt_f64.ll +++ b/test/CodeGen/R600/insert_vector_elt_f64.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; XFAIL: * -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; SI-LABEL: @dynamic_insertelement_v2f64: diff --git a/test/CodeGen/R600/llvm.sqrt.ll b/test/CodeGen/R600/llvm.sqrt.ll index 0d0d1861899..4eee37ffbe2 100644 --- a/test/CodeGen/R600/llvm.sqrt.ll +++ b/test/CodeGen/R600/llvm.sqrt.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 --mcpu=SI | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc < %s -march=r600 --mcpu=SI -verify-machineinstrs| FileCheck %s --check-prefix=SI-CHECK ; R600-CHECK-LABEL: @sqrt_f32 ; R600-CHECK: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z diff --git a/test/CodeGen/R600/load-i1.ll b/test/CodeGen/R600/load-i1.ll index 685ceb2e60a..9ba81b85f59 100644 --- a/test/CodeGen/R600/load-i1.ll +++ b/test/CodeGen/R600/load-i1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; SI-LABEL: @global_copy_i1_to_i1 diff --git a/test/CodeGen/R600/local-64.ll b/test/CodeGen/R600/local-64.ll index 38e5289f572..c52b41bb1b5 100644 --- a/test/CodeGen/R600/local-64.ll +++ b/test/CodeGen/R600/local-64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; SI-LABEL: @local_i32_load ; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0] diff --git a/test/CodeGen/R600/loop-idiom.ll b/test/CodeGen/R600/loop-idiom.ll index 8a9cba2796c..128f661077e 100644 --- a/test/CodeGen/R600/loop-idiom.ll +++ b/test/CodeGen/R600/loop-idiom.ll @@ -1,5 +1,5 @@ ; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s -; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "r600--" diff --git a/test/CodeGen/R600/register-count-comments.ll b/test/CodeGen/R600/register-count-comments.ll index a64b2804bde..329077cde57 100644 --- a/test/CodeGen/R600/register-count-comments.ll +++ b/test/CodeGen/R600/register-count-comments.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s declare i32 @llvm.SI.tid() nounwind readnone diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll index 2a286d1b47b..3d2142d53ec 100644 --- a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll +++ b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll @@ -1,6 +1,6 @@ ; XFAIL: * ; REQUIRES: asserts -; RUN: llc -O0 -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI +; RUN: llc -O0 -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/R600/setcc.ll index 8d34c4ad4fe..ad72732cab0 100644 --- a/test/CodeGen/R600/setcc.ll +++ b/test/CodeGen/R600/setcc.ll @@ -1,5 +1,5 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s +;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s ; FUNC-LABEL: @setcc_v2i32 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z diff --git a/test/CodeGen/R600/setcc64.ll b/test/CodeGen/R600/setcc64.ll index 9202fc01f55..c137125183a 100644 --- a/test/CodeGen/R600/setcc64.ll +++ b/test/CodeGen/R600/setcc64.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s +;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s ; XXX: Merge this into setcc, once R600 supports 64-bit operations diff --git a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll index d74161bf6dc..9d8a623125f 100644 --- a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll +++ b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; Copy VGPR -> SGPR used twice as an instruction operand, which is then ; used in an REG_SEQUENCE that also needs to be handled. diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll index 5472c1bb1ca..c581d86b99b 100644 --- a/test/CodeGen/R600/sgpr-copy.ll +++ b/test/CodeGen/R600/sgpr-copy.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s ; This test checks that no VGPR to SGPR copies are created by the register ; allocator. diff --git a/test/CodeGen/R600/si-annotate-cf-assertion.ll b/test/CodeGen/R600/si-annotate-cf-assertion.ll index cd3ba2b222d..daa4667150b 100644 --- a/test/CodeGen/R600/si-annotate-cf-assertion.ll +++ b/test/CodeGen/R600/si-annotate-cf-assertion.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; XFAIL: * -; RUN: llc -march=r600 -mcpu=SI -asm-verbose=false < %s | FileCheck %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s define void @test(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind { diff --git a/test/CodeGen/R600/store-v3i64.ll b/test/CodeGen/R600/store-v3i64.ll index 58229f60482..58d28b567bd 100644 --- a/test/CodeGen/R600/store-v3i64.ll +++ b/test/CodeGen/R600/store-v3i64.ll @@ -1,5 +1,5 @@ ; XFAIL: * -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI ; SI-LABEL: @global_store_v3i64: ; SI: BUFFER_STORE_DWORDX4 diff --git a/test/CodeGen/R600/store-vector-ptrs.ll b/test/CodeGen/R600/store-vector-ptrs.ll index 3af7d919c6f..41c5edc280d 100644 --- a/test/CodeGen/R600/store-vector-ptrs.ll +++ b/test/CodeGen/R600/store-vector-ptrs.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; XFAIL: * -; RUN: llc -march=r600 -mcpu=SI < %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind { %p = getelementptr <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> diff --git a/test/CodeGen/R600/trunc-store-i1.ll b/test/CodeGen/R600/trunc-store-i1.ll index a88894325b6..a3975c8b8e4 100644 --- a/test/CodeGen/R600/trunc-store-i1.ll +++ b/test/CodeGen/R600/trunc-store-i1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; SI-LABEL: @global_truncstore_i32_to_i1 diff --git a/test/CodeGen/R600/uaddo.ll b/test/CodeGen/R600/uaddo.ll index 4f24c85f1a0..3b69687b362 100644 --- a/test/CodeGen/R600/uaddo.ll +++ b/test/CodeGen/R600/uaddo.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone diff --git a/test/CodeGen/R600/udivrem64.ll b/test/CodeGen/R600/udivrem64.ll index 3cdbb691e10..b3caebf123d 100644 --- a/test/CodeGen/R600/udivrem64.ll +++ b/test/CodeGen/R600/udivrem64.ll @@ -1,4 +1,4 @@ -;XUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s +;XUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s ;FUNC-LABEL: @test_udiv diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll index 2824ff8a88c..4df69d1e5f1 100644 --- a/test/CodeGen/R600/unaligned-load-store.ll +++ b/test/CodeGen/R600/unaligned-load-store.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; SI-LABEL: @unaligned_load_store_i32: ; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]