mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:39:47 +00:00
Fold compares irrespective of whether allocation can be elided
Summary When a non-escaping pointer is compared to a global value, the comparison can be folded even if the corresponding malloc/allocation call cannot be elided. We need to make sure the global value is not null, since comparisons to null cannot be folded. In future, we should also handle cases when the the comparison instruction dominates the pointer escape. Reviewers: sanjoy Subscribers s.egerton, llvm-commits Differential Revision: http://reviews.llvm.org/D19549 llvm-svn: 268390
This commit is contained in:
parent
c0965c3577
commit
2567f150e9
@ -300,7 +300,7 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
|
||||
Worklist.push_back(&UU);
|
||||
}
|
||||
break;
|
||||
case Instruction::ICmp:
|
||||
case Instruction::ICmp: {
|
||||
// Don't count comparisons of a no-alias return value against null as
|
||||
// captures. This allows us to ignore comparisons of malloc results
|
||||
// with null, for example.
|
||||
@ -309,11 +309,19 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
|
||||
if (CPN->getType()->getAddressSpace() == 0)
|
||||
if (isNoAliasCall(V->stripPointerCasts()))
|
||||
break;
|
||||
// Comparison against value stored in global variable. Given the pointer
|
||||
// does not escape, its value cannot be guessed and stored separately in a
|
||||
// global variable.
|
||||
unsigned OtherIndex = (I->getOperand(0) == V) ? 1 : 0;
|
||||
auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIndex));
|
||||
if (LI && isa<GlobalVariable>(LI->getPointerOperand()))
|
||||
break;
|
||||
// Otherwise, be conservative. There are crazy ways to capture pointers
|
||||
// using comparisons.
|
||||
if (Tracker->captured(U))
|
||||
return;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// Something else - be conservative and say it is captured.
|
||||
if (Tracker->captured(U))
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/CaptureTracking.h"
|
||||
#include "llvm/Analysis/ConstantFolding.h"
|
||||
#include "llvm/Analysis/MemoryBuiltins.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
@ -1922,10 +1923,10 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
|
||||
// If the C and C++ standards are ever made sufficiently restrictive in this
|
||||
// area, it may be possible to update LLVM's semantics accordingly and reinstate
|
||||
// this optimization.
|
||||
static Constant *computePointerICmp(const DataLayout &DL,
|
||||
const TargetLibraryInfo *TLI,
|
||||
CmpInst::Predicate Pred, Value *LHS,
|
||||
Value *RHS) {
|
||||
static Constant *
|
||||
computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
|
||||
const DominatorTree *DT, CmpInst::Predicate Pred,
|
||||
const Instruction *CxtI, Value *LHS, Value *RHS) {
|
||||
// First, skip past any trivial no-ops.
|
||||
LHS = LHS->stripPointerCasts();
|
||||
RHS = RHS->stripPointerCasts();
|
||||
@ -2081,6 +2082,21 @@ static Constant *computePointerICmp(const DataLayout &DL,
|
||||
(IsNAC(RHSUObjs) && IsAllocDisjoint(LHSUObjs)))
|
||||
return ConstantInt::get(GetCompareTy(LHS),
|
||||
!CmpInst::isTrueWhenEqual(Pred));
|
||||
|
||||
// Fold comparisons for non-escaping pointer even if the allocation call
|
||||
// cannot be elided. We cannot fold malloc comparison to null. Also, the
|
||||
// dynamic allocation call could be either of the operands.
|
||||
Value *MI = nullptr;
|
||||
if (isAllocLikeFn(LHS, TLI) && llvm::isKnownNonNullAt(RHS, CxtI, DT, TLI))
|
||||
MI = LHS;
|
||||
else if (isAllocLikeFn(RHS, TLI) &&
|
||||
llvm::isKnownNonNullAt(LHS, CxtI, DT, TLI))
|
||||
MI = RHS;
|
||||
// FIXME: We should also fold the compare when the pointer escapes, but the
|
||||
// compare dominates the pointer escape
|
||||
if (MI && !PointerMayBeCaptured(MI, true, true))
|
||||
return ConstantInt::get(GetCompareTy(LHS),
|
||||
CmpInst::isFalseWhenEqual(Pred));
|
||||
}
|
||||
|
||||
// Otherwise, fail.
|
||||
@ -3027,7 +3043,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
|
||||
// Simplify comparisons of related pointers using a powerful, recursive
|
||||
// GEP-walk when we have target data available..
|
||||
if (LHS->getType()->isPointerTy())
|
||||
if (Constant *C = computePointerICmp(Q.DL, Q.TLI, Pred, LHS, RHS))
|
||||
if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.CxtI, LHS, RHS))
|
||||
return C;
|
||||
|
||||
if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) {
|
||||
|
@ -21,24 +21,66 @@ define i1 @compare_global_trivialne() {
|
||||
%cmp = icmp ne i32* %bc, %lgp
|
||||
ret i1 %cmp
|
||||
; CHECK-LABEL: compare_global_trivialne
|
||||
; CHECK: ret i1 true
|
||||
; CHECK: ret i1 true
|
||||
}
|
||||
|
||||
|
||||
; Although the %m is marked nocapture in the deopt operand in call to function f,
|
||||
; we cannot remove the alloc site: call to malloc
|
||||
; FIXME: The comparison should fold to false irrespective of whether the call to malloc can be elided or not
|
||||
; The comparison should fold to false irrespective of whether the call to malloc can be elided or not
|
||||
declare void @f()
|
||||
define i32 @compare_and_call_with_deopt() {
|
||||
define i1 @compare_and_call_with_deopt() {
|
||||
; CHECK-LABEL: compare_and_call_with_deopt
|
||||
%m = call i8* @malloc(i64 24)
|
||||
%bc = bitcast i8* %m to i32*
|
||||
%lgp = load i32*, i32** @gp, align 8
|
||||
%cmp = icmp eq i32* %bc, %lgp
|
||||
%rt = zext i1 %cmp to i32
|
||||
%lgp = load i32*, i32** @gp, align 8, !nonnull !0
|
||||
%cmp = icmp eq i32* %lgp, %bc
|
||||
tail call void @f() [ "deopt"(i8* %m) ]
|
||||
ret i32 %rt
|
||||
; CHECK: ret i32 %rt
|
||||
ret i1 %cmp
|
||||
; CHECK: ret i1 false
|
||||
}
|
||||
|
||||
; Same functon as above with deopt operand in function f, but comparison is NE
|
||||
define i1 @compare_ne_and_call_with_deopt() {
|
||||
; CHECK-LABEL: compare_ne_and_call_with_deopt
|
||||
%m = call i8* @malloc(i64 24)
|
||||
%bc = bitcast i8* %m to i32*
|
||||
%lgp = load i32*, i32** @gp, align 8, !nonnull !0
|
||||
%cmp = icmp ne i32* %lgp, %bc
|
||||
tail call void @f() [ "deopt"(i8* %m) ]
|
||||
ret i1 %cmp
|
||||
; CHECK: ret i1 true
|
||||
}
|
||||
|
||||
; Same function as above, but global not marked nonnull, and we cannot fold the comparison
|
||||
define i1 @compare_ne_global_maybe_null() {
|
||||
; CHECK-LABEL: compare_ne_global_maybe_null
|
||||
%m = call i8* @malloc(i64 24)
|
||||
%bc = bitcast i8* %m to i32*
|
||||
%lgp = load i32*, i32** @gp
|
||||
%cmp = icmp ne i32* %lgp, %bc
|
||||
tail call void @f() [ "deopt"(i8* %m) ]
|
||||
ret i1 %cmp
|
||||
; CHECK: ret i1 %cmp
|
||||
}
|
||||
|
||||
; FIXME: The comparison should fold to false since %m escapes (call to function escape)
|
||||
; after the comparison.
|
||||
declare void @escape(i8*)
|
||||
define i1 @compare_and_call_after() {
|
||||
; CHECK-LABEL: compare_and_call_after
|
||||
%m = call i8* @malloc(i64 24)
|
||||
%bc = bitcast i8* %m to i32*
|
||||
%lgp = load i32*, i32** @gp, align 8, !nonnull !0
|
||||
%cmp = icmp eq i32* %bc, %lgp
|
||||
br i1 %cmp, label %escape_call, label %just_return
|
||||
|
||||
escape_call:
|
||||
call void @escape(i8* %m)
|
||||
ret i1 true
|
||||
|
||||
just_return:
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
define i1 @compare_distinct_mallocs() {
|
||||
@ -63,7 +105,7 @@ define i1 @compare_samepointer_under_bitcast() {
|
||||
}
|
||||
|
||||
; the compare is folded to true since the folding compare looks through bitcasts.
|
||||
; call to malloc and the bitcast instructions are elided after that since there are no uses of the malloc
|
||||
; The malloc call for %m cannot be elided since it is used in the call to function f.
|
||||
define i1 @compare_samepointer_escaped() {
|
||||
%m = call i8* @malloc(i64 4)
|
||||
%bc = bitcast i8* %m to i32*
|
||||
@ -77,6 +119,34 @@ define i1 @compare_samepointer_escaped() {
|
||||
; CHECK: ret i1 true
|
||||
}
|
||||
|
||||
; Technically, we can fold the %cmp2 comparison, even though %m escapes through
|
||||
; the ret statement since `ret` terminates the function and we cannot reach from
|
||||
; the ret to cmp.
|
||||
; FIXME: Folding this %cmp2 when %m escapes through ret could be an issue with
|
||||
; cross-threading data dependencies since we do not make the distinction between
|
||||
; atomic and non-atomic loads in capture tracking.
|
||||
define i8* @compare_ret_escape(i8* %c) {
|
||||
%m = call i8* @malloc(i64 4)
|
||||
%n = call i8* @malloc(i64 4)
|
||||
%cmp = icmp eq i8* %n, %c
|
||||
br i1 %cmp, label %retst, label %chk
|
||||
|
||||
retst:
|
||||
ret i8* %m
|
||||
|
||||
chk:
|
||||
%bc = bitcast i8* %m to i32*
|
||||
%lgp = load i32*, i32** @gp, align 8, !nonnull !0
|
||||
%cmp2 = icmp eq i32* %bc, %lgp
|
||||
br i1 %cmp2, label %retst, label %chk2
|
||||
|
||||
chk2:
|
||||
ret i8* %n
|
||||
; CHECK-LABEL: compare_ret_escape
|
||||
; CHECK: %cmp = icmp eq i8* %n, %c
|
||||
; CHECK: %cmp2 = icmp eq i32* %bc, %lgp
|
||||
}
|
||||
|
||||
; The malloc call for %m cannot be elided since it is used in the call to function f.
|
||||
; However, the cmp can be folded to true as %n doesnt escape and %m, %n are distinct allocations
|
||||
define i1 @compare_distinct_pointer_escape() {
|
||||
@ -90,3 +160,5 @@ define i1 @compare_distinct_pointer_escape() {
|
||||
; CHECK-NEXT: tail call void @f() [ "deopt"(i8* %m) ]
|
||||
; CHECK-NEXT: ret i1 true
|
||||
}
|
||||
|
||||
!0 = !{}
|
||||
|
Loading…
Reference in New Issue
Block a user