Fold compares irrespective of whether allocation can be elided

Summary
When a non-escaping pointer is compared to a global value, the
comparison can be folded even if the corresponding malloc/allocation
call cannot be elided.
We need to make sure the global value is not null, since comparisons to
null cannot be folded.

In future, we should also handle cases when the the comparison
instruction dominates the pointer escape.

Reviewers: sanjoy
Subscribers s.egerton, llvm-commits

Differential Revision: http://reviews.llvm.org/D19549

llvm-svn: 268390
This commit is contained in:
Anna Thomas 2016-05-03 14:58:21 +00:00
parent c0965c3577
commit 2567f150e9
3 changed files with 111 additions and 15 deletions

View File

@ -300,7 +300,7 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
Worklist.push_back(&UU);
}
break;
case Instruction::ICmp:
case Instruction::ICmp: {
// Don't count comparisons of a no-alias return value against null as
// captures. This allows us to ignore comparisons of malloc results
// with null, for example.
@ -309,11 +309,19 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
if (CPN->getType()->getAddressSpace() == 0)
if (isNoAliasCall(V->stripPointerCasts()))
break;
// Comparison against value stored in global variable. Given the pointer
// does not escape, its value cannot be guessed and stored separately in a
// global variable.
unsigned OtherIndex = (I->getOperand(0) == V) ? 1 : 0;
auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIndex));
if (LI && isa<GlobalVariable>(LI->getPointerOperand()))
break;
// Otherwise, be conservative. There are crazy ways to capture pointers
// using comparisons.
if (Tracker->captured(U))
return;
break;
}
default:
// Something else - be conservative and say it is captured.
if (Tracker->captured(U))

View File

@ -21,6 +21,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
@ -1922,10 +1923,10 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
// If the C and C++ standards are ever made sufficiently restrictive in this
// area, it may be possible to update LLVM's semantics accordingly and reinstate
// this optimization.
static Constant *computePointerICmp(const DataLayout &DL,
const TargetLibraryInfo *TLI,
CmpInst::Predicate Pred, Value *LHS,
Value *RHS) {
static Constant *
computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, CmpInst::Predicate Pred,
const Instruction *CxtI, Value *LHS, Value *RHS) {
// First, skip past any trivial no-ops.
LHS = LHS->stripPointerCasts();
RHS = RHS->stripPointerCasts();
@ -2081,6 +2082,21 @@ static Constant *computePointerICmp(const DataLayout &DL,
(IsNAC(RHSUObjs) && IsAllocDisjoint(LHSUObjs)))
return ConstantInt::get(GetCompareTy(LHS),
!CmpInst::isTrueWhenEqual(Pred));
// Fold comparisons for non-escaping pointer even if the allocation call
// cannot be elided. We cannot fold malloc comparison to null. Also, the
// dynamic allocation call could be either of the operands.
Value *MI = nullptr;
if (isAllocLikeFn(LHS, TLI) && llvm::isKnownNonNullAt(RHS, CxtI, DT, TLI))
MI = LHS;
else if (isAllocLikeFn(RHS, TLI) &&
llvm::isKnownNonNullAt(LHS, CxtI, DT, TLI))
MI = RHS;
// FIXME: We should also fold the compare when the pointer escapes, but the
// compare dominates the pointer escape
if (MI && !PointerMayBeCaptured(MI, true, true))
return ConstantInt::get(GetCompareTy(LHS),
CmpInst::isFalseWhenEqual(Pred));
}
// Otherwise, fail.
@ -3027,7 +3043,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Simplify comparisons of related pointers using a powerful, recursive
// GEP-walk when we have target data available..
if (LHS->getType()->isPointerTy())
if (Constant *C = computePointerICmp(Q.DL, Q.TLI, Pred, LHS, RHS))
if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.CxtI, LHS, RHS))
return C;
if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) {

View File

@ -21,24 +21,66 @@ define i1 @compare_global_trivialne() {
%cmp = icmp ne i32* %bc, %lgp
ret i1 %cmp
; CHECK-LABEL: compare_global_trivialne
; CHECK: ret i1 true
; CHECK: ret i1 true
}
; Although the %m is marked nocapture in the deopt operand in call to function f,
; we cannot remove the alloc site: call to malloc
; FIXME: The comparison should fold to false irrespective of whether the call to malloc can be elided or not
; The comparison should fold to false irrespective of whether the call to malloc can be elided or not
declare void @f()
define i32 @compare_and_call_with_deopt() {
define i1 @compare_and_call_with_deopt() {
; CHECK-LABEL: compare_and_call_with_deopt
%m = call i8* @malloc(i64 24)
%bc = bitcast i8* %m to i32*
%lgp = load i32*, i32** @gp, align 8
%cmp = icmp eq i32* %bc, %lgp
%rt = zext i1 %cmp to i32
%lgp = load i32*, i32** @gp, align 8, !nonnull !0
%cmp = icmp eq i32* %lgp, %bc
tail call void @f() [ "deopt"(i8* %m) ]
ret i32 %rt
; CHECK: ret i32 %rt
ret i1 %cmp
; CHECK: ret i1 false
}
; Same functon as above with deopt operand in function f, but comparison is NE
define i1 @compare_ne_and_call_with_deopt() {
; CHECK-LABEL: compare_ne_and_call_with_deopt
%m = call i8* @malloc(i64 24)
%bc = bitcast i8* %m to i32*
%lgp = load i32*, i32** @gp, align 8, !nonnull !0
%cmp = icmp ne i32* %lgp, %bc
tail call void @f() [ "deopt"(i8* %m) ]
ret i1 %cmp
; CHECK: ret i1 true
}
; Same function as above, but global not marked nonnull, and we cannot fold the comparison
define i1 @compare_ne_global_maybe_null() {
; CHECK-LABEL: compare_ne_global_maybe_null
%m = call i8* @malloc(i64 24)
%bc = bitcast i8* %m to i32*
%lgp = load i32*, i32** @gp
%cmp = icmp ne i32* %lgp, %bc
tail call void @f() [ "deopt"(i8* %m) ]
ret i1 %cmp
; CHECK: ret i1 %cmp
}
; FIXME: The comparison should fold to false since %m escapes (call to function escape)
; after the comparison.
declare void @escape(i8*)
define i1 @compare_and_call_after() {
; CHECK-LABEL: compare_and_call_after
%m = call i8* @malloc(i64 24)
%bc = bitcast i8* %m to i32*
%lgp = load i32*, i32** @gp, align 8, !nonnull !0
%cmp = icmp eq i32* %bc, %lgp
br i1 %cmp, label %escape_call, label %just_return
escape_call:
call void @escape(i8* %m)
ret i1 true
just_return:
ret i1 %cmp
}
define i1 @compare_distinct_mallocs() {
@ -63,7 +105,7 @@ define i1 @compare_samepointer_under_bitcast() {
}
; the compare is folded to true since the folding compare looks through bitcasts.
; call to malloc and the bitcast instructions are elided after that since there are no uses of the malloc
; The malloc call for %m cannot be elided since it is used in the call to function f.
define i1 @compare_samepointer_escaped() {
%m = call i8* @malloc(i64 4)
%bc = bitcast i8* %m to i32*
@ -77,6 +119,34 @@ define i1 @compare_samepointer_escaped() {
; CHECK: ret i1 true
}
; Technically, we can fold the %cmp2 comparison, even though %m escapes through
; the ret statement since `ret` terminates the function and we cannot reach from
; the ret to cmp.
; FIXME: Folding this %cmp2 when %m escapes through ret could be an issue with
; cross-threading data dependencies since we do not make the distinction between
; atomic and non-atomic loads in capture tracking.
define i8* @compare_ret_escape(i8* %c) {
%m = call i8* @malloc(i64 4)
%n = call i8* @malloc(i64 4)
%cmp = icmp eq i8* %n, %c
br i1 %cmp, label %retst, label %chk
retst:
ret i8* %m
chk:
%bc = bitcast i8* %m to i32*
%lgp = load i32*, i32** @gp, align 8, !nonnull !0
%cmp2 = icmp eq i32* %bc, %lgp
br i1 %cmp2, label %retst, label %chk2
chk2:
ret i8* %n
; CHECK-LABEL: compare_ret_escape
; CHECK: %cmp = icmp eq i8* %n, %c
; CHECK: %cmp2 = icmp eq i32* %bc, %lgp
}
; The malloc call for %m cannot be elided since it is used in the call to function f.
; However, the cmp can be folded to true as %n doesnt escape and %m, %n are distinct allocations
define i1 @compare_distinct_pointer_escape() {
@ -90,3 +160,5 @@ define i1 @compare_distinct_pointer_escape() {
; CHECK-NEXT: tail call void @f() [ "deopt"(i8* %m) ]
; CHECK-NEXT: ret i1 true
}
!0 = !{}