mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-26 16:05:54 +00:00
Only unswitch loops with uniform conditions
Loop unswitching can be extremely harmful for a SIMT target. In case if hoisted condition is not uniform a SIMT machine will execute both clones of a loop sequentially. Therefor LoopUnswitch checks if the condition is non-divergent. Since DivergenceAnalysis adds an expensive PostDominatorTree analysis not needed for non-SIMT targets a new option is added to avoid unneded analysis initialization. The method getAnalysisUsage is called when TargetTransformInfo is not yet available and we cannot use it here. For that reason a new field DivergentTarget is added to PassManagerBuilder to control the behavior and set this field from a target. Differential Revision: https://reviews.llvm.org/D30796 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298104 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5dfb7d0cfe
commit
17dcd3dc69
@ -153,6 +153,7 @@ public:
|
||||
bool PrepareForLTO;
|
||||
bool PrepareForThinLTO;
|
||||
bool PerformThinLTO;
|
||||
bool DivergentTarget;
|
||||
|
||||
/// Enable profile instrumentation pass.
|
||||
bool EnablePGOInstrGen;
|
||||
|
@ -169,7 +169,8 @@ Pass *createLoopStrengthReducePass();
|
||||
//
|
||||
// LoopUnswitch - This pass is a simple loop unswitching pass.
|
||||
//
|
||||
Pass *createLoopUnswitchPass(bool OptimizeForSize = false);
|
||||
Pass *createLoopUnswitchPass(bool OptimizeForSize = false,
|
||||
bool hasBranchDivergence = false);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -216,6 +216,8 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
|
||||
}
|
||||
|
||||
void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
|
||||
Builder.DivergentTarget = true;
|
||||
|
||||
bool Internalize = InternalizeSymbols &&
|
||||
(getOptLevel() > CodeGenOpt::None) &&
|
||||
(getTargetTriple().getArch() == Triple::amdgcn);
|
||||
|
@ -168,6 +168,7 @@ PassManagerBuilder::PassManagerBuilder() {
|
||||
PGOInstrUse = RunPGOInstrUse;
|
||||
PrepareForThinLTO = EnablePrepareForThinLTO;
|
||||
PerformThinLTO = false;
|
||||
DivergentTarget = false;
|
||||
}
|
||||
|
||||
PassManagerBuilder::~PassManagerBuilder() {
|
||||
@ -307,7 +308,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
|
||||
// Rotate Loop - disable header duplication at -Oz
|
||||
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
|
||||
MPM.add(createLICMPass()); // Hoist loop invariants
|
||||
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
|
||||
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
|
||||
MPM.add(createCFGSimplificationPass());
|
||||
addInstructionCombiningPass(MPM);
|
||||
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
|
||||
@ -588,7 +589,7 @@ void PassManagerBuilder::populateModulePassManager(
|
||||
MPM.add(createCorrelatedValuePropagationPass());
|
||||
addInstructionCombiningPass(MPM);
|
||||
MPM.add(createLICMPass());
|
||||
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
|
||||
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
|
||||
MPM.add(createCFGSimplificationPass());
|
||||
addInstructionCombiningPass(MPM);
|
||||
}
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/CodeMetrics.h"
|
||||
#include "llvm/Analysis/DivergenceAnalysis.h"
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/LoopPass.h"
|
||||
@ -180,12 +181,14 @@ namespace {
|
||||
// NewBlocks contained cloned copy of basic blocks from LoopBlocks.
|
||||
std::vector<BasicBlock*> NewBlocks;
|
||||
|
||||
bool hasBranchDivergence;
|
||||
|
||||
public:
|
||||
static char ID; // Pass ID, replacement for typeid
|
||||
explicit LoopUnswitch(bool Os = false) :
|
||||
explicit LoopUnswitch(bool Os = false, bool hasBranchDivergence = false) :
|
||||
LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
|
||||
currentLoop(nullptr), DT(nullptr), loopHeader(nullptr),
|
||||
loopPreheader(nullptr) {
|
||||
loopPreheader(nullptr), hasBranchDivergence(hasBranchDivergence) {
|
||||
initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
@ -198,6 +201,8 @@ namespace {
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<AssumptionCacheTracker>();
|
||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||
if (hasBranchDivergence)
|
||||
AU.addRequired<DivergenceAnalysis>();
|
||||
getLoopAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
@ -367,11 +372,12 @@ INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
|
||||
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
|
||||
INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
|
||||
false, false)
|
||||
|
||||
Pass *llvm::createLoopUnswitchPass(bool Os) {
|
||||
return new LoopUnswitch(Os);
|
||||
Pass *llvm::createLoopUnswitchPass(bool Os, bool hasBranchDivergence) {
|
||||
return new LoopUnswitch(Os, hasBranchDivergence);
|
||||
}
|
||||
|
||||
/// Operator chain lattice.
|
||||
@ -808,6 +814,15 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
|
||||
<< ". Cost too high.\n");
|
||||
return false;
|
||||
}
|
||||
if (hasBranchDivergence &&
|
||||
getAnalysis<DivergenceAnalysis>().isDivergent(LoopCond)) {
|
||||
DEBUG(dbgs() << "NOT unswitching loop %"
|
||||
<< currentLoop->getHeader()->getName()
|
||||
<< " at non-trivial condition '" << *Val
|
||||
<< "' == " << *LoopCond << "\n"
|
||||
<< ". Condition is divergent.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI);
|
||||
return true;
|
||||
|
85
test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll
Normal file
85
test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll
Normal file
@ -0,0 +1,85 @@
|
||||
; RUN: opt -mtriple=amdgcn-- -O3 -S %s | FileCheck %s
|
||||
|
||||
; Check that loop unswitch happened and condition hoisted out of the loop.
|
||||
; Condition is uniform so all targets should perform unswitching.
|
||||
|
||||
; CHECK-LABEL: {{^}}define void @uniform_unswitch
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: [[LOOP_COND:%[a-z0-9]+]] = icmp
|
||||
; CHECK-NEXT: [[IF_COND:%[a-z0-9]+]] = icmp eq i32 %x, 123456
|
||||
; CHECK-NEXT: and i1 [[LOOP_COND]], [[IF_COND]]
|
||||
; CHECK-NEXT: br i1
|
||||
|
||||
define void @uniform_unswitch(i32 * nocapture %out, i32 %n, i32 %x) {
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
%cmp1 = icmp eq i32 %x, 123456
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup.loopexit: ; preds = %for.inc
|
||||
br label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
|
||||
ret void
|
||||
|
||||
for.body: ; preds = %for.inc, %for.body.lr.ph
|
||||
%i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
|
||||
br i1 %cmp1, label %if.then, label %for.inc
|
||||
|
||||
if.then: ; preds = %for.body
|
||||
%arrayidx = getelementptr inbounds i32, i32 * %out, i32 %i.07
|
||||
store i32 %i.07, i32 * %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body, %if.then
|
||||
%inc = add nuw nsw i32 %i.07, 1
|
||||
%exitcond = icmp eq i32 %inc, %n
|
||||
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
|
||||
}
|
||||
|
||||
; Check that loop unswitch does not happen if condition is divergent.
|
||||
|
||||
; CHECK-LABEL: {{^}}define void @divergent_unswitch
|
||||
; CHECK: entry:
|
||||
; CHECK: icmp
|
||||
; CHECK: [[IF_COND:%[a-z0-9]+]] = icmp {{.*}} 567890
|
||||
; CHECK: br label
|
||||
; CHECK: br i1 [[IF_COND]]
|
||||
|
||||
define void @divergent_unswitch(i32 * nocapture %out, i32 %n) {
|
||||
entry:
|
||||
%cmp9 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp9, label %for.body.lr.ph, label %for.cond.cleanup
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
%call = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%cmp2 = icmp eq i32 %call, 567890
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup.loopexit: ; preds = %for.inc
|
||||
br label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
|
||||
ret void
|
||||
|
||||
for.body: ; preds = %for.inc, %for.body.lr.ph
|
||||
%i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
|
||||
br i1 %cmp2, label %if.then, label %for.inc
|
||||
|
||||
if.then: ; preds = %for.body
|
||||
%arrayidx = getelementptr inbounds i32, i32 * %out, i32 %i.010
|
||||
store i32 %i.010, i32 * %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body, %if.then
|
||||
%inc = add nuw nsw i32 %i.010, 1
|
||||
%exitcond = icmp eq i32 %inc, %n
|
||||
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
2
test/Transforms/LoopUnswitch/AMDGPU/lit.local.cfg
Normal file
2
test/Transforms/LoopUnswitch/AMDGPU/lit.local.cfg
Normal file
@ -0,0 +1,2 @@
|
||||
if not 'AMDGPU' in config.root.targets:
|
||||
config.unsupported = True
|
Loading…
x
Reference in New Issue
Block a user