[LoopVectorize] Register cloned assumptions

InstCombine cannot effectively remove redundant assumptions without them
registered in the assumption cache.  The vectorizer can create identical
assumptions but doesn't register them with the cache, resulting in
slower compile times because InstCombine tries to reason about a lot
more assumptions.

Fix this by registering the cloned assumptions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265800 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
David Majnemer 2016-04-08 16:37:10 +00:00
parent 7500ba0386
commit 951ea8be17
2 changed files with 56 additions and 10 deletions

View File

@ -314,13 +314,13 @@ public:
InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
LoopInfo *LI, DominatorTree *DT,
const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, unsigned VecWidth,
unsigned UnrollFactor)
const TargetTransformInfo *TTI, AssumptionCache *AC,
unsigned VecWidth, unsigned UnrollFactor)
: OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()),
Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor),
TripCount(nullptr), VectorTripCount(nullptr), Legal(nullptr),
AddedSafetyChecks(false) {}
AC(AC), VF(VecWidth), UF(UnrollFactor),
Builder(PSE.getSE()->getContext()), Induction(nullptr),
OldInduction(nullptr), WidenMap(UnrollFactor), TripCount(nullptr),
VectorTripCount(nullptr), Legal(nullptr), AddedSafetyChecks(false) {}
// Perform the actual loop widening (vectorization).
// MinimumBitWidths maps scalar integer values to the smallest bitwidth they
@ -524,6 +524,8 @@ protected:
const TargetLibraryInfo *TLI;
/// Target Transform Info.
const TargetTransformInfo *TTI;
/// Assumption Cache.
AssumptionCache *AC;
/// \brief LoopVersioning. It's only set up (non-null) if memchecks were
/// used.
@ -591,8 +593,10 @@ public:
InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
LoopInfo *LI, DominatorTree *DT,
const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, unsigned UnrollFactor)
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, 1, UnrollFactor) {}
const TargetTransformInfo *TTI, AssumptionCache *AC,
unsigned UnrollFactor)
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, 1,
UnrollFactor) {}
private:
void scalarizeInstruction(Instruction *Instr,
@ -1957,7 +1961,7 @@ struct LoopVectorize : public FunctionPass {
assert(IC > 1 && "interleave count should not be 1 or 0");
// If we decided that it is not legal to vectorize the loop then
// interleave it.
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, IC);
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, IC);
Unroller.vectorize(&LVL, CM.MinBWs);
emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
@ -1965,7 +1969,7 @@ struct LoopVectorize : public FunctionPass {
Twine(IC) + ")");
} else {
// If we decided that it is *legal* to vectorize the loop then do it.
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, VF.Width, IC);
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, VF.Width, IC);
LB.vectorize(&LVL, CM.MinBWs);
++LoopsVectorized;
@ -2728,6 +2732,11 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
// Place the cloned scalar in the new loop.
Builder.Insert(Cloned);
// If we just cloned a new assumption, add it the assumption cache.
if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
if (II->getIntrinsicID() == Intrinsic::assume)
AC->registerAssumption(II);
// If the original scalar returns a value we need to place it in a vector
// so that future users will be able to use it.
if (!IsVoidRetTy)
@ -6096,6 +6105,11 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
// Place the cloned scalar in the new loop.
Builder.Insert(Cloned);
// If we just cloned a new assumption, add it the assumption cache.
if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
if (II->getIntrinsicID() == Intrinsic::assume)
AC->registerAssumption(II);
// If the original scalar returns a value we need to place it in a vector
// so that future users will be able to use it.
if (!IsVoidRetTy)

View File

@ -0,0 +1,32 @@
; RUN: opt < %s -loop-vectorize -instcombine -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @test1() {
entry:
%alloca = alloca float, align 4
br label %loop_exit.dim.11.critedge
loop_exit.dim.11.critedge: ; preds = %loop_body.dim.0
%ptrint = ptrtoint float* %alloca to i64
%maskedptr = and i64 %ptrint, 4
%maskcond = icmp eq i64 %maskedptr, 0
br label %loop_header.dim.017.preheader
loop_header.dim.017.preheader: ; preds = %loop_exit.dim.016, %loop_exit.dim.11.critedge
br label %loop_body.dim.018
loop_body.dim.018: ; preds = %loop_body.dim.018, %loop_header.dim.017.preheader
%invar_address.dim.019.0135 = phi i64 [ 0, %loop_header.dim.017.preheader ], [ %0, %loop_body.dim.018 ]
call void @llvm.assume(i1 %maskcond)
; CHECK: call void @llvm.assume(
; CHECK-NOT: call void @llvm.assume(
%0 = add nuw nsw i64 %invar_address.dim.019.0135, 1
%1 = icmp eq i64 %0, 256
br i1 %1, label %loop_header.dim.017.preheader, label %loop_body.dim.018
}
; Function Attrs: nounwind
declare void @llvm.assume(i1) #0
attributes #0 = { nounwind }