[LoopUnroll] allow customization for new-pass-manager version of LoopUnroll

Unlike its legacy counterpart new pass manager's LoopUnrollPass does
not provide any means to select which flavors of unroll to run
(runtime, peeling, partial), relying on global defaults.

In some cases having ability to run a restricted LoopUnroll that
does more than LoopFullUnroll is needed.

Introduced LoopUnrollOptions to select optional unroll behaviors.
Added 'unroll<peeling>' to PassRegistry mainly for the sake of testing.

Reviewers: chandlerc, tejohnson
Differential Revision: https://reviews.llvm.org/D53440

llvm-svn: 345723
This commit is contained in:
Fedor Sergeev 2018-10-31 14:33:14 +00:00
parent 5297adacba
commit 3984a320d2
6 changed files with 104 additions and 16 deletions

View File

@ -10,6 +10,7 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
#define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
@ -30,16 +31,71 @@ public:
LoopStandardAnalysisResults &AR, LPMUpdater &U);
};
/// A set of parameters used to control various transforms performed by the
/// LoopUnroll pass. Each of the boolean parameters can be set to:
/// true - enabling the transformation.
/// false - disabling the transformation.
/// None - relying on a global default.
///
/// There is also OptLevel parameter, which is used for additional loop unroll
/// tuning.
///
/// Intended use is to create a default object, modify parameters with
/// additional setters and then pass it to LoopUnrollPass.
///
struct LoopUnrollOptions {
Optional<bool> AllowPartial;
Optional<bool> AllowPeeling;
Optional<bool> AllowRuntime;
Optional<bool> AllowUpperBound;
int OptLevel;
LoopUnrollOptions(int OptLevel = 2) : OptLevel(OptLevel) {}
/// Enables or disables partial unrolling. When disabled only full unrolling
/// is allowed.
LoopUnrollOptions &setPartial(bool Partial) {
AllowPartial = Partial;
return *this;
}
/// Enables or disables unrolling of loops with runtime trip count.
LoopUnrollOptions &setRuntime(bool Runtime) {
AllowRuntime = Runtime;
return *this;
}
/// Enables or disables loop peeling.
LoopUnrollOptions &setPeeling(bool Peeling) {
AllowPeeling = Peeling;
return *this;
}
/// Enables or disables the use of trip count upper bound
/// in loop unrolling.
LoopUnrollOptions &setUpperBound(bool UpperBound) {
AllowUpperBound = UpperBound;
return *this;
}
// Sets "optimization level" tuning parameter for loop unrolling.
LoopUnrollOptions &setOptLevel(int O) {
OptLevel = O;
return *this;
}
};
/// Loop unroll pass that will support both full and partial unrolling.
/// It is a function pass to have access to function and module analyses.
/// It will also put loops into canonical form (simplified and LCSSA).
class LoopUnrollPass : public PassInfoMixin<LoopUnrollPass> {
const int OptLevel;
LoopUnrollOptions UnrollOpts;
public:
/// This uses the target information (or flags) to control the thresholds for
/// different unrolling stategies but supports all of them.
explicit LoopUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {}
explicit LoopUnrollPass(LoopUnrollOptions UnrollOpts = {})
: UnrollOpts(UnrollOpts) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};

View File

@ -830,7 +830,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
OptimizePM.addPass(
createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
}
OptimizePM.addPass(LoopUnrollPass(Level));
OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(Level)));
OptimizePM.addPass(InstCombinePass());
OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));

View File

@ -215,6 +215,7 @@ FUNCTION_PASS("sroa", SROA())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
FUNCTION_PASS("unroll", LoopUnrollPass())
FUNCTION_PASS("unroll<peeling;no-runtime>",LoopUnrollPass(LoopUnrollOptions().setPeeling(true).setRuntime(false)))
FUNCTION_PASS("verify", VerifierPass())
FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
FUNCTION_PASS("verify<loops>", LoopVerifierPass())

View File

@ -1333,23 +1333,20 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
Loop *ParentL = L.getParentLoop();
#endif
// The API here is quite complex to call, but there are only two interesting
// states we support: partial and full (or "simple") unrolling. However, to
// enable these things we actually pass "None" in for the optional to avoid
// providing an explicit choice.
Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam,
AllowPeeling;
// Check if the profile summary indicates that the profiled application
// has a huge working set size, in which case we disable peeling to avoid
// bloating it further.
Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
if (PSI && PSI->hasHugeWorkingSetSize())
AllowPeeling = false;
LocalAllowPeeling = false;
std::string LoopName = L.getName();
LoopUnrollResult Result =
tryToUnrollLoop(&L, DT, &LI, SE, TTI, AC, ORE,
/*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
/*Threshold*/ None, AllowPartialParam, RuntimeParam,
UpperBoundParam, AllowPeeling);
// The API here is quite complex to call and we allow to select some
// flavors of unrolling during construction time (by setting UnrollOpts).
LoopUnrollResult Result = tryToUnrollLoop(
&L, DT, &LI, SE, TTI, AC, ORE,
/*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*Count*/ None,
/*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
UnrollOpts.AllowUpperBound, LocalAllowPeeling);
Changed |= Result != LoopUnrollResult::Unmodified;
// The parent must not be damaged by unrolling!

View File

@ -1,4 +1,6 @@
; RUN: opt < %s -S -loop-unroll -unroll-force-peel-count=3 -verify-dom-info -simplifycfg -instcombine | FileCheck %s
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll,simplify-cfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll<peeling;no-runtime>,simplify-cfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
; Basic loop peeling - check that we can peel-off the first 3 loop iterations
; when explicitly requested.

View File

@ -1,8 +1,16 @@
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=EPILOG,COMMON
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON
;
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=EPILOG,COMMON
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON
;
; Restricted versions of unroll (unroll<peeling;noruntime>, unroll-full) should not be doing runtime unrolling
; even if it is globally enabled through -unroll-runtime option
;
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll<peeling;no-runtime>' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=NOEPILOG,COMMON
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll<peeling;no-runtime>' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=NOPROLOG,COMMON
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=NOEPILOG,COMMON
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=NOPROLOG,COMMON
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@ -14,22 +22,32 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
; NOEPILOG-NOT: %xtraiter = and i32 %n
; PROLOG: %xtraiter = and i32 %n
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
; NOPROLOG-NOT: %xtraiter = and i32 %n
; EPILOG: for.body.epil:
; EPILOG: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %for.body.epil.preheader ]
; EPILOG: %epil.iter.sub = sub i32 %epil.iter, 1
; EPILOG: %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0
; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop !0
; NOEPILOG: for.body:
; NOEPILOG-NOT: for.body.epil:
; PROLOG: for.body.prol:
; PROLOG: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
; PROLOG: %prol.iter.sub = sub i32 %prol.iter, 1
; PROLOG: %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !llvm.loop !0
; NOPROLOG: for.body:
; NOPROLOG-NOT: for.body.prol:
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
@ -86,6 +104,8 @@ for.end: ; preds = %for.body
; COMMON-LABEL: @foo(
; EPILOG: bb72.2:
; PROLOG: bb72.2:
; NOEPILOG-NOT: bb72.2:
; NOPROLOG-NOT: bb72.2:
define void @foo(i32 %trips) {
entry:
@ -111,9 +131,15 @@ cond_true138:
; EPILOG: for.body.epil:
; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.cond.for.end_crit_edge.epilog-lcssa
; NOEPILOG: for.body:
; NOEPILOG-NOT: for.body.epil:
; PROLOG: for.body.prol:
; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit
; NOPROLOG: for.body:
; NOPROLOG-NOT: for.body.prol:
define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
entry:
%cmp2 = icmp eq i32 %len, 0
@ -146,9 +172,15 @@ for.end: ; preds = %for.cond.for.end_cr
; EPILOG: for.body:
; EPILOG-NOT: for.body.epil:
; NOEPILOG: for.body:
; NOEPILOG-NOT: for.body.epil:
; PROLOG: for.body:
; PROLOG-NOT: for.body.prol:
; NOPROLOG: for.body:
; NOPROLOG-NOT: for.body.prol:
define zeroext i16 @test2(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
entry:
%cmp2 = icmp eq i32 %len, 0