AMDGPU: Change insertion point of si_mask_branch

Insert before the skip branch if one is created.
This is a somewhat more natural placement relative
to the skip branches, and makes it possible to implement
analyzeBranch for skip blocks.

The test changes are mostly due to a quirk where
the block label is not emitted if there is a terminator
that is not also a branch.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@278273 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2016-08-10 19:11:42 +00:00
parent 898f0e0994
commit 34c6b123f7
7 changed files with 49 additions and 29 deletions

View File

@ -1777,9 +1777,9 @@ let hasSideEffects = 1 in {
// replaced with exec mask operations.
def SI_MASK_BRANCH : PseudoInstSI <
(outs), (ins brtarget:$target, SReg_64:$dst)> {
let isBranch = 1;
let isBranch = 0;
let isTerminator = 1;
let isBarrier = 1;
let isBarrier = 0;
let SALU = 1;
}

View File

@ -80,7 +80,7 @@ private:
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
void Skip(MachineInstr &From, MachineOperand &To);
MachineInstr *Skip(MachineInstr &From, MachineOperand &To);
bool skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB);
void If(MachineInstr &MI);
@ -182,14 +182,15 @@ bool SILowerControlFlow::shouldSkip(MachineBasicBlock *From,
return false;
}
void SILowerControlFlow::Skip(MachineInstr &From, MachineOperand &To) {
MachineInstr *SILowerControlFlow::Skip(MachineInstr &From, MachineOperand &To) {
if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
return;
return nullptr;
DebugLoc DL = From.getDebugLoc();
BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
const DebugLoc &DL = From.getDebugLoc();
MachineInstr *Skip =
BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
.addOperand(To);
return Skip;
}
bool SILowerControlFlow::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
@ -242,10 +243,13 @@ void SILowerControlFlow::If(MachineInstr &MI) {
.addReg(AMDGPU::EXEC)
.addReg(Reg);
Skip(MI, MI.getOperand(2));
MachineInstr *SkipInst = Skip(MI, MI.getOperand(2));
// Insert before the new branch instruction.
MachineInstr *InsPt = SkipInst ? SkipInst : &MI;
// Insert a pseudo terminator to help keep the verifier happy.
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
.addOperand(MI.getOperand(2))
.addReg(Reg);
@ -275,10 +279,13 @@ void SILowerControlFlow::Else(MachineInstr &MI) {
.addReg(AMDGPU::EXEC)
.addReg(Dst);
Skip(MI, MI.getOperand(2));
MachineInstr *SkipInst = Skip(MI, MI.getOperand(2));
// Insert before the new branch instruction.
MachineInstr *InsPt = SkipInst ? SkipInst : &MI;
// Insert a pseudo terminator to help keep the verifier happy.
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
.addOperand(MI.getOperand(2))
.addReg(Dst);

View File

@ -4,7 +4,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
; GCN-LABEL: {{^}}convergent_inlineasm:
; GCN: BB#0:
; GCN: v_cmp_ne_i32_e64
; GCN: BB#1:
; GCN: ; mask branch
; GCN: BB{{[0-9]+_[0-9]+}}:
define void @convergent_inlineasm(i64 addrspace(1)* nocapture %arg) {
bb:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
@ -22,9 +23,12 @@ bb5: ; preds = %bb3, %bb
}
; GCN-LABEL: {{^}}nonconvergent_inlineasm:
; GCN: BB#1:
; GCN: ; mask branch
; GCN: BB{{[0-9]+_[0-9]+}}:
; GCN: v_cmp_ne_i32_e64
; GCN: BB1_2:
; GCN: BB{{[0-9]+_[0-9]+}}:
define void @nonconvergent_inlineasm(i64 addrspace(1)* nocapture %arg) {
bb:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()

View File

@ -202,8 +202,11 @@ exit:
; CHECK: v_cmp_eq_i32_e32 vcc, 0, v0
; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc
; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]]
; CHECK-NEXT: s_cbranch_execz [[EXIT:BB[0-9]+_[0-9]+]]
; CHECK-NEXT: ; mask branch [[EXIT]]
; CHECK-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
; CHECK-NEXT: s_cbranch_execz [[EXIT]]
; CHECK: {{BB[0-9]+_[0-9]+}}: ; %bb.preheader
; CHECK: s_mov_b32
; CHECK: [[LOOP_BB:BB[0-9]+_[0-9]+]]:
@ -353,7 +356,7 @@ bb7: ; preds = %bb4
; CHECK: mask branch [[END:BB[0-9]+_[0-9]+]]
; CHECK-NOT: branch
; CHECK: ; BB#3: ; %bb8
; CHECK: BB{{[0-9]+_[0-9]+}}: ; %bb8
; CHECK: buffer_store_dword
; CHECK: [[END]]:
@ -387,4 +390,4 @@ declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) nounwind
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #1 = { nounwind readnone }

View File

@ -5,6 +5,11 @@
; CHECK-LABEL: {{^}}test1:
; CHECK: v_cmp_ne_i32_e32 vcc, 0
; CHECK: s_and_saveexec_b64
; CHECK-NEXT: s_xor_b64
; CHECK-NEXT: ; mask branch
; CHECK-NEXT: s_cbranch_execz
; CHECK-NEXT: BB{{[0-9]+_[0-9]+}}: ; %loop_body.preheader
; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]:
; CHECK: s_and_b64 vcc, exec, vcc
@ -30,10 +35,11 @@ out:
ret void
}
;CHECK-LABEL: {{^}}test2:
;CHECK: s_and_saveexec_b64
;CHECK: s_xor_b64
;CHECK-NEXT: s_cbranch_execz
; CHECK-LABEL: {{^}}test2:
; CHECK: s_and_saveexec_b64
; CHECK-NEXT: s_xor_b64
; CHECK-NEXT: ; mask branch
; CHECK-NEXT: s_cbranch_execz
define void @test2(i32 addrspace(1)* %out, i32 %a, i32 %b) {
main_body:
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1

View File

@ -47,7 +47,7 @@ end:
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
; SI: ; BB#1
; SI: BB{{[0-9]+_[0-9]+}}:
; SI: buffer_store_dword
; SI: s_endpgm
@ -68,7 +68,7 @@ exit:
ret void
}
; SI-LABEL: @simple_test_v_loop
; SI-LABEL: {{^}}simple_test_v_loop:
; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
@ -106,7 +106,7 @@ exit:
ret void
}
; SI-LABEL: @multi_vcond_loop
; SI-LABEL: {{^}}multi_vcond_loop:
; Load loop limit from buffer
; Branch to exit if uniformly not taken
@ -118,7 +118,7 @@ exit:
; SI: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
; Initialize inner condition to false
; SI: ; BB#1:
; SI: BB{{[0-9]+_[0-9]+}}: ; %bb10.preheader
; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}}
; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]]
@ -133,7 +133,7 @@ exit:
; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]]
; SI: s_cbranch_execz [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
; SI: BB#3:
; SI: BB{{[0-9]+_[0-9]+}}: ; %bb20
; SI: buffer_store_dword
; SI: v_cmp_ge_i64_e32 [[CMP:s\[[0-9]+:[0-9]+\]|vcc]]
; SI: s_or_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[CMP]], [[COND_STATE]]

View File

@ -123,7 +123,7 @@ END:
;CHECK-NEXT: s_and_b64 [[SAVED]], exec, [[SAVED]]
;CHECK-NEXT: s_xor_b64 exec, exec, [[SAVED]]
;CHECK-NEXT: mask branch [[END_BB:BB[0-9]+_[0-9]+]]
;CHECK-NEXT: ; BB#3: ; %ELSE
;CHECK-NEXT: BB{{[0-9]+_[0-9]+}}: ; %ELSE
;CHECK: store_dword
;CHECK: [[END_BB]]: ; %END
;CHECK: s_or_b64 exec, exec,