Exploit dereferenceable_or_null attribute in LICM pass

Summary:
Allow hoisting of loads from values marked with dereferenceable_or_null
attribute. For values marked with the attribute perform
context-sensitive analysis to determine whether it's known-non-null or
not.

Patch by Artur Pilipenko!

Reviewers: hfinkel, sanjoy, reames

Reviewed By: reames

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D9253

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237593 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Sanjoy Das 2015-05-18 18:07:00 +00:00
parent 9962fd0e2e
commit 0799eb1140
5 changed files with 237 additions and 43 deletions

View File

@ -219,12 +219,14 @@ namespace llvm {
/// are lifetime markers.
bool onlyUsedByLifetimeMarkers(const Value *V);
/// isDereferenceablePointer - Return true if this is always a dereferenceable
/// pointer.
///
/// Test if this value is always a pointer to allocated and suitably aligned
/// memory for a simple load or store.
bool isDereferenceablePointer(const Value *V, const DataLayout &DL);
/// isDereferenceablePointer - Return true if this is always a dereferenceable
/// pointer. If the context instruction is specified perform context-sensitive
/// analysis and return true if the pointer is dereferenceable at the
/// specified instruction.
bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr,
const TargetLibraryInfo *TLI = nullptr);
/// isSafeToSpeculativelyExecute - Return true if the instruction does not
/// have any effects besides calculating the result and does not have
@ -239,18 +241,36 @@ namespace llvm {
/// memory leak. It also returns false for instructions related to control
/// flow, specifically terminators and PHI nodes.
///
/// This method only looks at the instruction itself and its operands, so if
/// this method returns true, it is safe to move the instruction as long as
/// the correct dominance relationships for the operands and users hold.
/// However, this method can return true for instructions that read memory;
/// If the CtxI is specified this method performs context-sensitive analysis
/// and returns true if it is safe to execute the instruction immediately
/// before the CtxI.
///
/// If the CtxI is NOT specified this method only looks at the instruction
/// itself and its operands, so if this method returns true, it is safe to
/// move the instruction as long as the correct dominance relationships for
/// the operands and users hold.
///
/// This method can return true for instructions that read memory;
/// for such instructions, moving them may change the resulting value.
bool isSafeToSpeculativelyExecute(const Value *V);
bool isSafeToSpeculativelyExecute(const Value *V,
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr,
const TargetLibraryInfo *TLI = nullptr);
/// isKnownNonNull - Return true if this pointer couldn't possibly be null by
/// its definition. This returns true for allocas, non-extern-weak globals
/// and byval arguments.
bool isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI = nullptr);
/// isKnownNonNullAt - Return true if this pointer couldn't possibly be null.
/// If the context instruction is specified perform context-sensitive analysis
/// and return true if the pointer couldn't possibly be null at the specified
/// instruction.
bool isKnownNonNullAt(const Value *V,
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr,
const TargetLibraryInfo *TLI = nullptr);
/// Return true if it is valid to use the assumptions provided by an
/// assume intrinsic, I, at the point in the control-flow identified by the
/// context instruction, CxtI.

View File

@ -226,13 +226,13 @@ bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
/// first order w.r.t the DominatorTree. This allows us to visit definitions
/// before uses, allowing us to hoist a loop body in one pass without iteration.
/// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, DataLayout,
/// TargetLibraryInfo, Loop, AliasSet information for all instructions of the
/// TargetLibraryInfo, Loop, AliasSet information for all instructions of the
/// loop and loop safety information as arguments. It returns changed status.
bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
TargetLibraryInfo *, Loop *, AliasSetTracker *,
LICMSafetyInfo *);
/// \brief Try to promote memory values to scalars by sinking stores out of
/// \brief Try to promote memory values to scalars by sinking stores out of
/// the loop and moving loads to before the loop. We do this by looping over
/// the stores in the loop, looking for stores to Must pointers which are
/// loop invariant. It takes AliasSet, Loop exit blocks vector, loop exit blocks

View File

@ -2864,33 +2864,48 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
}
static bool isDereferenceableFromAttribute(const Value *BV, APInt Offset,
Type *Ty, const DataLayout &DL) {
Type *Ty, const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT,
const TargetLibraryInfo *TLI) {
assert(Offset.isNonNegative() && "offset can't be negative");
assert(Ty->isSized() && "must be sized");
APInt DerefBytes(Offset.getBitWidth(), 0);
bool CheckForNonNull = false;
if (const Argument *A = dyn_cast<Argument>(BV)) {
DerefBytes = A->getDereferenceableBytes();
if (!DerefBytes.getBoolValue()) {
DerefBytes = A->getDereferenceableOrNullBytes();
CheckForNonNull = true;
}
} else if (auto CS = ImmutableCallSite(BV)) {
DerefBytes = CS.getDereferenceableBytes(0);
if (!DerefBytes.getBoolValue()) {
DerefBytes = CS.getDereferenceableOrNullBytes(0);
CheckForNonNull = true;
}
}
if (DerefBytes.getBoolValue())
if (DerefBytes.uge(Offset + DL.getTypeStoreSize(Ty)))
return true;
if (!CheckForNonNull || isKnownNonNullAt(BV, CtxI, DT, TLI))
return true;
return false;
}
static bool isDereferenceableFromAttribute(const Value *V,
const DataLayout &DL) {
static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT,
const TargetLibraryInfo *TLI) {
Type *VTy = V->getType();
Type *Ty = VTy->getPointerElementType();
if (!Ty->isSized())
return false;
APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0);
return isDereferenceableFromAttribute(V, Offset, Ty, DL);
return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI);
}
/// Return true if Value is always a dereferenceable pointer.
@ -2898,6 +2913,9 @@ static bool isDereferenceableFromAttribute(const Value *V,
/// Test if V is always a pointer to allocated and suitably aligned memory for
/// a simple load or store.
static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT,
const TargetLibraryInfo *TLI,
SmallPtrSetImpl<const Value *> &Visited) {
// Note that it is not safe to speculate into a malloc'd region because
// malloc may return null.
@ -2918,7 +2936,8 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
if (STy->isSized() && DTy->isSized() &&
(DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) &&
(DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy)))
return isDereferenceablePointer(BC->getOperand(0), DL, Visited);
return isDereferenceablePointer(BC->getOperand(0), DL, CtxI,
DT, TLI, Visited);
}
// Global variables which can't collapse to null are ok.
@ -2930,7 +2949,7 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
if (A->hasByValAttr())
return true;
if (isDereferenceableFromAttribute(V, DL))
if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI))
return true;
// For GEPs, determine if the indexing lands within the allocated object.
@ -2938,7 +2957,8 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
// Conservatively require that the base pointer be fully dereferenceable.
if (!Visited.insert(GEP->getOperand(0)).second)
return false;
if (!isDereferenceablePointer(GEP->getOperand(0), DL, Visited))
if (!isDereferenceablePointer(GEP->getOperand(0), DL, CtxI,
DT, TLI, Visited))
return false;
// Check the indices.
gep_type_iterator GTI = gep_type_begin(GEP);
@ -2972,18 +2992,22 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
if (const IntrinsicInst *I = dyn_cast<IntrinsicInst>(V))
if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) {
GCRelocateOperands RelocateInst(I);
return isDereferenceablePointer(RelocateInst.getDerivedPtr(), DL,
Visited);
return isDereferenceablePointer(RelocateInst.getDerivedPtr(), DL, CtxI,
DT, TLI, Visited);
}
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
return isDereferenceablePointer(ASC->getOperand(0), DL, Visited);
return isDereferenceablePointer(ASC->getOperand(0), DL, CtxI,
DT, TLI, Visited);
// If we don't know, assume the worst.
return false;
}
bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL) {
bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT,
const TargetLibraryInfo *TLI) {
// When dereferenceability information is provided by a dereferenceable
// attribute, we know exactly how many bytes are dereferenceable. If we can
// determine the exact offset to the attributed variable, we can use that
@ -2995,15 +3019,19 @@ bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL) {
const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
if (Offset.isNonNegative())
if (isDereferenceableFromAttribute(BV, Offset, Ty, DL))
if (isDereferenceableFromAttribute(BV, Offset, Ty, DL,
CtxI, DT, TLI))
return true;
}
SmallPtrSet<const Value *, 32> Visited;
return ::isDereferenceablePointer(V, DL, Visited);
return ::isDereferenceablePointer(V, DL, CtxI, DT, TLI, Visited);
}
bool llvm::isSafeToSpeculativelyExecute(const Value *V) {
bool llvm::isSafeToSpeculativelyExecute(const Value *V,
const Instruction *CtxI,
const DominatorTree *DT,
const TargetLibraryInfo *TLI) {
const Operator *Inst = dyn_cast<Operator>(V);
if (!Inst)
return false;
@ -3050,7 +3078,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V) {
LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
return false;
const DataLayout &DL = LI->getModule()->getDataLayout();
return isDereferenceablePointer(LI->getPointerOperand(), DL);
return isDereferenceablePointer(LI->getPointerOperand(), DL, CtxI, DT, TLI);
}
case Instruction::Call: {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@ -3141,6 +3169,60 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
return false;
}
static bool isKnownNonNullFromDominatingCondition(const Value *V,
const Instruction *CtxI,
const DominatorTree *DT) {
unsigned NumUsesExplored = 0;
for (auto U : V->users()) {
// Avoid massive lists
if (NumUsesExplored >= DomConditionsMaxUses)
break;
NumUsesExplored++;
// Consider only compare instructions uniquely controlling a branch
const ICmpInst *Cmp = dyn_cast<ICmpInst>(U);
if (!Cmp)
continue;
if (DomConditionsSingleCmpUse && !Cmp->hasOneUse())
continue;
for (auto *CmpU : Cmp->users()) {
const BranchInst *BI = dyn_cast<BranchInst>(CmpU);
if (!BI)
continue;
assert(BI->isConditional() && "uses a comparison!");
BasicBlock *NonNullSuccessor = nullptr;
CmpInst::Predicate Pred;
if (match(const_cast<ICmpInst*>(Cmp),
m_c_ICmp(Pred, m_Specific(V), m_Zero()))) {
if (Pred == ICmpInst::ICMP_EQ)
NonNullSuccessor = BI->getSuccessor(1);
else if (Pred == ICmpInst::ICMP_NE)
NonNullSuccessor = BI->getSuccessor(0);
}
if (NonNullSuccessor) {
BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
return true;
}
}
}
return false;
}
bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI,
const DominatorTree *DT, const TargetLibraryInfo *TLI) {
if (isKnownNonNull(V, TLI))
return true;
return CtxI ? ::isKnownNonNullFromDominatingCondition(V, CtxI, DT) : false;
}
OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
const DataLayout &DL,
AssumptionCache *AC,

View File

@ -82,6 +82,7 @@ static bool isGuaranteedToExecute(const Instruction &Inst,
const LICMSafetyInfo *SafetyInfo);
static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
const DominatorTree *DT,
const TargetLibraryInfo *TLI,
const Loop *CurLoop,
const LICMSafetyInfo *SafetyInfo);
static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
@ -92,8 +93,8 @@ static Instruction *CloneInstructionInExitBlock(const Instruction &I,
PHINode &PN,
const LoopInfo *LI);
static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA,
DominatorTree *DT, Loop *CurLoop,
AliasSetTracker *CurAST,
DominatorTree *DT, TargetLibraryInfo *TLI,
Loop *CurLoop, AliasSetTracker *CurAST,
LICMSafetyInfo *SafetyInfo);
namespace {
@ -337,7 +338,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// operands of the instruction are loop invariant.
//
if (isNotUsedInLoop(I, CurLoop) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo)) {
canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo)) {
++II;
Changed |= sink(I, LI, DT, CurLoop, CurAST);
}
@ -386,8 +387,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// is safe to hoist the instruction.
//
if (CurLoop->hasLoopInvariantOperands(&I) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo) &&
isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo))
canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo) &&
isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo))
Changed |= hoist(I, CurLoop->getLoopPreheader());
}
@ -425,8 +426,8 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
/// instruction.
///
bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
Loop *CurLoop, AliasSetTracker *CurAST,
LICMSafetyInfo *SafetyInfo) {
TargetLibraryInfo *TLI, Loop *CurLoop,
AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
// Loads have extra constraints we have to verify before we can hoist them.
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (!LI->isUnordered())
@ -486,7 +487,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
!isa<InsertValueInst>(I))
return false;
return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo);
return isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo);
}
/// Returns true if a PHINode is a trivially replaceable with an
@ -639,15 +640,16 @@ static bool hoist(Instruction &I, BasicBlock *Preheader) {
return true;
}
/// Only sink or hoist an instruction if it is not a trapping instruction
/// Only sink or hoist an instruction if it is not a trapping instruction,
/// or if the instruction is known not to trap when moved to the preheader.
/// or if it is a trapping instruction and is guaranteed to execute.
///
static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
const DominatorTree *DT,
const TargetLibraryInfo *TLI,
const Loop *CurLoop,
const LICMSafetyInfo *SafetyInfo) {
// If it is not a trapping instruction, it is always safe to hoist.
if (isSafeToSpeculativelyExecute(&Inst))
const Instruction *CtxI = CurLoop->getLoopPreheader()->getTerminator();
if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
return true;
return isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo);

View File

@ -164,5 +164,95 @@ for.end: ; preds = %for.inc, %entry
ret void
}
; This test represents the following function:
; void test1(int * __restrict__ a, int *b, int &c, int n) {
; if (c != null)
; for (int i = 0; i < n; ++i)
; if (a[i] > 0)
; a[i] = c*b[i];
; }
; and we want to hoist the load of %c out of the loop. This can be done only
; because the dereferenceable_or_null attribute is on %c and there is a null
; check on %c.
; CHECK-LABEL: @test5
; CHECK: load i32, i32* %c, align 4
; CHECK: for.body:
define void @test5(i32* noalias %a, i32* %b, i32* dereferenceable_or_null(4) %c, i32 %n) #0 {
entry:
%not_null = icmp ne i32* %c, null
br i1 %not_null, label %not.null, label %for.end
not.null:
%cmp11 = icmp sgt i32 %n, 0
br i1 %cmp11, label %for.body, label %for.end
for.body: ; preds = %not.null, %for.inc
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %not.null ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%cmp1 = icmp sgt i32 %0, 0
br i1 %cmp1, label %if.then, label %for.inc
if.then: ; preds = %for.body
%1 = load i32, i32* %c, align 4
%arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
%2 = load i32, i32* %arrayidx3, align 4
%mul = mul nsw i32 %2, %1
store i32 %mul, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body, %if.then
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.inc, %entry, %not.null
ret void
}
; This is the same as @test5, but without the null check on %c.
; Without this check, we should not hoist the load of %c.
; This test case has an icmp on c but the use of this comparison is
; not a branch.
; CHECK-LABEL: @test6
; CHECK: if.then:
; CHECK: load i32, i32* %c, align 4
define i1 @test6(i32* noalias %a, i32* %b, i32* dereferenceable_or_null(4) %c, i32 %n) #0 {
entry:
%not_null = icmp ne i32* %c, null
%cmp11 = icmp sgt i32 %n, 0
br i1 %cmp11, label %for.body, label %for.end
for.body: ; preds = %entry, %for.inc
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%cmp1 = icmp sgt i32 %0, 0
br i1 %cmp1, label %if.then, label %for.inc
if.then: ; preds = %for.body
%1 = load i32, i32* %c, align 4
%arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
%2 = load i32, i32* %arrayidx3, align 4
%mul = mul nsw i32 %2, %1
store i32 %mul, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body, %if.then
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.inc, %entry
ret i1 %not_null
}
attributes #0 = { nounwind uwtable }