mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-22 13:32:09 +00:00
[AMDGPU] Preserve inverted bit in SI_IF in presence of SI_KILL
In case if SI_KILL is in between of the SI_IF and SI_END_CF we need to preserve the bits actually flipped by if rather then restoring the original mask. Differential Revision: https://reviews.llvm.org/D36299 llvm-svn: 310031
This commit is contained in:
parent
82dfccec6e
commit
9cdd47faf1
@ -134,6 +134,38 @@ static void setImpSCCDefDead(MachineInstr &MI, bool IsDead) {
|
||||
|
||||
char &llvm::SILowerControlFlowID = SILowerControlFlow::ID;
|
||||
|
||||
static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI) {
|
||||
unsigned SaveExecReg = MI.getOperand(0).getReg();
|
||||
auto U = MRI->use_instr_nodbg_begin(SaveExecReg);
|
||||
|
||||
if (U == MRI->use_instr_nodbg_end() ||
|
||||
std::next(U) != MRI->use_instr_nodbg_end() ||
|
||||
U->getOpcode() != AMDGPU::SI_END_CF)
|
||||
return false;
|
||||
|
||||
// Check for SI_KILL_TERMINATOR on path from if to endif.
|
||||
// if there is any such terminator simplififcations are not safe.
|
||||
auto SMBB = MI.getParent();
|
||||
auto EMBB = U->getParent();
|
||||
DenseSet<const MachineBasicBlock*> Visited;
|
||||
SmallVector<MachineBasicBlock*, 4> Worklist(SMBB->succ_begin(),
|
||||
SMBB->succ_end());
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
MachineBasicBlock *MBB = Worklist.pop_back_val();
|
||||
|
||||
if (MBB == EMBB || !Visited.insert(MBB).second)
|
||||
continue;
|
||||
for(auto &Term : MBB->terminators())
|
||||
if (Term.getOpcode() == AMDGPU::SI_KILL_TERMINATOR)
|
||||
return false;
|
||||
|
||||
Worklist.append(MBB->succ_begin(), MBB->succ_end());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void SILowerControlFlow::emitIf(MachineInstr &MI) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
@ -152,11 +184,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
|
||||
// If there is only one use of save exec register and that use is SI_END_CF,
|
||||
// we can optimize SI_IF by returning the full saved exec mask instead of
|
||||
// just cleared bits.
|
||||
bool SimpleIf = false;
|
||||
auto U = MRI->use_instr_nodbg_begin(SaveExecReg);
|
||||
SimpleIf = U != MRI->use_instr_nodbg_end() &&
|
||||
std::next(U) == MRI->use_instr_nodbg_end() &&
|
||||
U->getOpcode() == AMDGPU::SI_END_CF;
|
||||
bool SimpleIf = isSimpleIf(MI, MRI);
|
||||
|
||||
// Add an implicit def of exec to discourage scheduling VALU after this which
|
||||
// will interfere with trying to form s_and_saveexec_b64 later.
|
||||
|
71
test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll
Normal file
71
test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll
Normal file
@ -0,0 +1,71 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}if_with_kill:
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]],
|
||||
; GCN-NEXT: s_xor_b64 s[{{[0-9:]+}}], exec, [[SAVEEXEC]]
|
||||
define amdgpu_ps void @if_with_kill(i32 %arg) {
|
||||
.entry:
|
||||
%cmp = icmp eq i32 %arg, 32
|
||||
br i1 %cmp, label %then, label %endif
|
||||
|
||||
then:
|
||||
tail call void @llvm.AMDGPU.kilp()
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}if_with_loop_kill_after:
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]],
|
||||
; GCN-NEXT: s_xor_b64 s[{{[0-9:]+}}], exec, [[SAVEEXEC]]
|
||||
define amdgpu_ps void @if_with_loop_kill_after(i32 %arg) {
|
||||
.entry:
|
||||
%cmp = icmp eq i32 %arg, 32
|
||||
br i1 %cmp, label %then, label %endif
|
||||
|
||||
then:
|
||||
%sub = sub i32 %arg, 1
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%ind = phi i32 [%sub, %then], [%dec, %loop]
|
||||
%dec = sub i32 %ind, 1
|
||||
%cc = icmp ne i32 %ind, 0
|
||||
br i1 %cc, label %loop, label %break
|
||||
|
||||
break:
|
||||
tail call void @llvm.AMDGPU.kilp()
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}if_with_kill_inside_loop:
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]],
|
||||
; GCN-NEXT: s_xor_b64 s[{{[0-9:]+}}], exec, [[SAVEEXEC]]
|
||||
define amdgpu_ps void @if_with_kill_inside_loop(i32 %arg) {
|
||||
.entry:
|
||||
%cmp = icmp eq i32 %arg, 32
|
||||
br i1 %cmp, label %then, label %endif
|
||||
|
||||
then:
|
||||
%sub = sub i32 %arg, 1
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%ind = phi i32 [%sub, %then], [%dec, %loop]
|
||||
%dec = sub i32 %ind, 1
|
||||
%cc = icmp ne i32 %ind, 0
|
||||
tail call void @llvm.AMDGPU.kilp()
|
||||
br i1 %cc, label %loop, label %break
|
||||
|
||||
break:
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.AMDGPU.kilp()
|
@ -202,6 +202,7 @@ exit:
|
||||
; CHECK-LABEL: {{^}}test_kill_divergent_loop:
|
||||
; CHECK: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]]
|
||||
; CHECK-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
|
||||
; CHECK-NEXT: s_cbranch_execz [[EXIT]]
|
||||
|
||||
@ -336,6 +337,7 @@ bb7: ; preds = %bb4
|
||||
; CHECK-LABEL: {{^}}if_after_kill_block:
|
||||
; CHECK: ; BB#0:
|
||||
; CHECK: s_and_saveexec_b64
|
||||
; CHECK: s_xor_b64
|
||||
; CHECK-NEXT: mask branch [[BB4:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: v_cmpx_le_f32_e32 vcc, 0,
|
||||
|
Loading…
x
Reference in New Issue
Block a user