[InstCombine] Look through PHIs, GEPs, IntToPtrs and PtrToInts to expose more constants when comparing GEPs

Summary:
When comparing two GEP instructions which have the same base pointer
and one of them has a constant index, it is possible to only compare
indices, transforming it to a compare with a constant. This removes
one use for the GEP instruction with the constant index, can reduce
register pressure and can sometimes lead to removing the comparisson
entirely.

InstCombine was already doing this when comparing two GEPs if the
base pointers were the same. However, in the case where we have
complex pointer arithmetic (GEPs applied to GEPs, PHIs of GEPs,
conversions to or from integers, etc) the value of the original
base pointer will be hidden to the optimizer and this transformation
will be disabled.

This change detects when the two sides of the comparison can be
expressed as GEPs with the same base pointer, even if they don't
appear as such in the IR. The transformation will convert all the
pointer arithmetic to arithmetic done on indices and all the
relevant uses of GEPs to GEPs with a common base pointer. The
GEP comparison will be converted to a comparison done on indices.

Reviewers: majnemer, jmolloy

Subscribers: hfinkel, jevinskie, jmolloy, aadg, llvm-commits

Differential Revision: http://reviews.llvm.org/D15146

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257064 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Silviu Baranga 2016-01-07 14:56:08 +00:00
parent 83fc76e8eb
commit f3ba9f9b6a
2 changed files with 422 additions and 3 deletions

View File

@ -13,6 +13,7 @@
#include "InstCombineInternal.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
@ -595,6 +596,320 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset");
}
/// Returns true if we can rewrite Start as a GEP with pointer Base
/// and some integer offset. The nodes that need to be re-written
/// for this transformation will be added to Explored.
static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
const DataLayout &DL,
SetVector<Value *> &Explored) {
SmallVector<Value *, 16> WorkList(1, Start);
Explored.insert(Base);
// The following traversal gives us an order which can be used
// when doing the final transformation. Since in the final
// transformation we create the PHI replacement instructions first,
// we don't have to get them in any particular order.
//
// However, for other instructions we will have to traverse the
// operands of an instruction first, which means that we have to
// do a post-order traversal.
while (!WorkList.empty()) {
SetVector<PHINode *> PHIs;
while (!WorkList.empty()) {
if (Explored.size() >= 100)
return false;
Value *V = WorkList.back();
if (Explored.count(V) != 0) {
WorkList.pop_back();
continue;
}
if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) &&
!isa<GEPOperator>(V) && !isa<PHINode>(V))
// We've found some value that we can't explore which is different from
// the base. Therefore we can't do this transformation.
return false;
if (isa<IntToPtrInst>(V) || isa<PtrToIntInst>(V)) {
auto *CI = dyn_cast<CastInst>(V);
if (!CI->isNoopCast(DL))
return nullptr;
if (Explored.count(CI->getOperand(0)) == 0)
WorkList.push_back(CI->getOperand(0));
}
if (auto *GEP = dyn_cast<GEPOperator>(V)) {
// We're limiting the GEP to having one index. This will preserve
// the original pointer type. We could handle more cases in the
// future.
if (GEP->getNumIndices() != 1 || !GEP->isInBounds())
return nullptr;
if (Explored.count(GEP->getOperand(0)) == 0)
WorkList.push_back(GEP->getOperand(0));
}
if (WorkList.back() == V) {
WorkList.pop_back();
// We've finished visiting this node, mark it as such.
Explored.insert(V);
}
if (auto *PN = dyn_cast<PHINode>(V)) {
Explored.insert(PN);
PHIs.insert(PN);
}
}
// Explore the PHI nodes further.
for (auto *PN : PHIs)
for (Value *Op : PN->incoming_values())
if (Explored.count(Op) == 0)
WorkList.push_back(Op);
}
// Make sure that we can do this. Since we can't insert GEPs in a basic
// block before a PHI node, we can't easily do this transformation if
// we have PHI node users of transformed instructions.
for (Value *Val : Explored) {
for (Value *Use : Val->uses()) {
auto *PHI = dyn_cast<PHINode>(Use);
auto *Inst = dyn_cast<Instruction>(Val);
if (Inst == Base || Inst == PHI || !Inst || !PHI ||
Explored.count(PHI) == 0)
continue;
if (PHI->getParent() == Inst->getParent())
return false;
}
}
return true;
}
// Sets the appropriate insert point on Builder where we can add
// a replacement Instruction for V (if that is possible).
static void setInsertionPoint(IRBuilder<> &Builder, Value *V,
bool Before = true) {
if (auto *PHI = dyn_cast<PHINode>(V)) {
Builder.SetInsertPoint(&*PHI->getParent()->getFirstInsertionPt());
return;
}
if (auto *I = dyn_cast<Instruction>(V)) {
if (!Before)
I = &*std::next(I->getIterator());
Builder.SetInsertPoint(I);
return;
}
if (auto *A = dyn_cast<Argument>(V)) {
// Set the insertion point in the entry block.
BasicBlock &Entry = A->getParent()->getEntryBlock();
Builder.SetInsertPoint(&*Entry.getFirstInsertionPt());
return;
}
// Otherwise, this is a constant and we don't need to set a new
// insertion point.
assert(isa<ConstantExpr>(V) && "Setting insertion point for unknown value!");
}
/// Returns a re-written value of Start as an indexed GEP using Base as a
/// pointer.
static Value *rewriteGEPAsOffset(Value *Start, Value *Base,
const DataLayout &DL,
SetVector<Value *> &Explored) {
// Perform all the substitutions. This is a bit tricky because we can
// have cycles in our use-def chains.
// 1. Create the PHI nodes without any incoming values.
// 2. Create all the other values.
// 3. Add the edges for the PHI nodes.
// 4. Emit GEPs to get the original pointers.
// 5. Remove the original instructions.
Type *IndexType = IntegerType::get(
Base->getContext(), DL.getPointerTypeSizeInBits(Start->getType()));
DenseMap<Value *, Value *> NewInsts;
NewInsts[Base] = ConstantInt::getNullValue(IndexType);
// Create the new PHI nodes, without adding any incoming values.
for (Value *Val : Explored) {
if (Val == Base)
continue;
// Create empty phi nodes. This avoids cyclic dependencies when creating
// the remaining instructions.
if (auto *PHI = dyn_cast<PHINode>(Val))
NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(),
PHI->getName() + ".idx", PHI);
}
IRBuilder<> Builder(Base->getContext());
// Create all the other instructions.
for (Value *Val : Explored) {
if (NewInsts.find(Val) != NewInsts.end())
continue;
if (auto *CI = dyn_cast<CastInst>(Val)) {
NewInsts[CI] = NewInsts[CI->getOperand(0)];
continue;
}
if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
Value *Index = NewInsts[GEP->getOperand(1)]
? NewInsts[GEP->getOperand(1)]
: GEP->getOperand(1);
setInsertionPoint(Builder, GEP);
// Indices might need to be sign extended. GEPs will magically do
// this, but we need to do it ourselves here.
if (Index->getType()->getScalarSizeInBits() !=
NewInsts[GEP->getOperand(0)]->getType()->getScalarSizeInBits()) {
Index = Builder.CreateSExtOrTrunc(
Index, NewInsts[GEP->getOperand(0)]->getType(),
GEP->getOperand(0)->getName() + ".sext");
}
NewInsts[GEP] =
Builder.CreateAdd(NewInsts[GEP->getOperand(0)], Index,
GEP->getOperand(0)->getName() + ".add");
continue;
}
if (isa<PHINode>(Val))
continue;
llvm_unreachable("Unexpected instruction type");
}
// Add the incoming values to the PHI nodes.
for (Value *Val : Explored) {
if (Val == Base)
continue;
// All the instructions have been created, we can now add edges to the
// phi nodes.
if (auto *PHI = dyn_cast<PHINode>(Val)) {
PHINode *NewPhi = static_cast<PHINode *>(NewInsts[PHI]);
for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) {
Value *NewIncoming = PHI->getIncomingValue(I);
if (NewInsts.find(NewIncoming) != NewInsts.end())
NewIncoming = NewInsts[NewIncoming];
NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I));
}
}
}
// If required, create a inttoptr instruction.
Value *NewBase = Base;
setInsertionPoint(Builder, Base, false);
if (!Base->getType()->isPointerTy())
NewBase = Builder.CreateBitOrPointerCast(Base, Start->getType(),
Start->getName() + "to.ptr");
for (Value *Val : Explored) {
if (Val == Base)
continue;
// Depending on the type, for external users we have to emit
// a GEP or a GEP + ptrtoint.
if (isa<Instruction>(NewInsts[Val]))
setInsertionPoint(Builder, NewInsts[Val], false);
else
setInsertionPoint(Builder, NewBase, false);
Value *GEP =
Builder.CreateInBoundsGEP(Start->getType()->getPointerElementType(),
NewBase, {NewInsts[Val]},
Val->getName() + ".ptr");
if (!Val->getType()->isPointerTy()) {
Value *Cast = Builder.CreatePointerCast(GEP, Val->getType(),
Val->getName() + ".conv");
GEP = Cast;
}
Val->replaceAllUsesWith(GEP);
}
return NewInsts[Start];
}
/// Looks through GEPs, IntToPtrInsts and PtrToIntInsts in order to express
/// the input Value as a GEP constant indexed GEP. Returns a pair containing
/// the GEPs Pointer and Index.
static std::pair<Value *, Value *>
getAsConstantIndexedAddress(Value *V, const DataLayout &DL) {
Type *IndexType =
IntegerType::get(V->getContext(),
DL.getPointerTypeSizeInBits(V->getType()));
Constant *Index = ConstantInt::getNullValue(IndexType);
while (true) {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
// We accept only inbouds GEPs here to exclude the possibility of
// overflow.
if (!GEP->isInBounds())
break;
if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1) {
V = GEP->getOperand(0);
Constant *GEPIndex = static_cast<Constant *>(GEP->getOperand(1));
Index = ConstantExpr::getAdd(
Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType));
continue;
}
break;
}
if (auto *CI = dyn_cast<IntToPtrInst>(V)) {
if (!CI->isNoopCast(DL))
break;
V = CI->getOperand(0);
continue;
}
if (auto *CI = dyn_cast<PtrToIntInst>(V)) {
if (!CI->isNoopCast(DL))
break;
V = CI->getOperand(0);
continue;
}
break;
}
return {V, Index};
}
// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant.
// We can look through PHIs, GEPs and casts in order to determine a
// common base between GEPLHS and RHS.
static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond,
const DataLayout &DL) {
if (!GEPLHS->hasAllConstantIndices())
return nullptr;
Value *PtrBase, *Index;
std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL);
// The set of nodes that will take part in this transformation.
SetVector<Value *> Nodes;
if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes))
return nullptr;
// We know we can re-write this as
// ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)
// Since we've only looked through inbouds GEPs we know that we
// can't have overflow on either side. We can therefore re-write
// this as:
// OFFSET1 cmp OFFSET2
Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes);
// RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written
// GEP having PtrBase as the pointer base, and has returned in NewRHS the
// offset. Since Index is the offset of LHS to the base pointer, we will now
// compare the offsets instead of comparing the pointers.
return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Index, NewRHS);
}
/// FoldGEPICmp - Fold comparisons between a GEP instruction and something
/// else. At this point we know that the GEP is on the LHS of the comparison.
Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
@ -674,8 +989,9 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
}
// Otherwise, the base pointers are different and the indices are
// different, bail out.
return nullptr;
// different. Try convert this to an indexed compare by looking through
// PHIs/casts.
return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
}
// If one of the GEPs has all zero indices, recurse.
@ -727,7 +1043,10 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
}
}
return nullptr;
// Try convert this to an indexed compare by looking through PHIs/casts as a
// last resort.
return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
}
Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca,

View File

@ -0,0 +1,100 @@
; RUN: opt -instcombine -S < %s | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
define i32 *@test1(i32* %A, i32 %Offset) {
entry:
%tmp = getelementptr inbounds i32, i32* %A, i32 %Offset
br label %bb
bb:
%RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ]
%LHS = getelementptr inbounds i32, i32* %A, i32 100
%RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1
%cond = icmp ult i32 * %LHS, %RHS
br i1 %cond, label %bb2, label %bb
bb2:
ret i32* %RHS
; CHECK-LABEL: @test1(
; CHECK: %[[INDEX:[0-9A-Za-z.]+]] = phi i32 [ %[[ADD:[0-9A-Za-z.]+]], %bb ], [ %Offset, %entry ]
; CHECK: %[[ADD]] = add i32 %[[INDEX]], 1
; CHECK: %cond = icmp sgt i32 %[[INDEX]], 100
; CHECK: br i1 %cond, label %bb2, label %bb
; CHECK: %[[PTR:[0-9A-Za-z.]+]] = getelementptr inbounds i32, i32* %A, i32 %[[INDEX]]
; CHECK: ret i32* %[[PTR]]
}
define i32 *@test2(i32 %A, i32 %Offset) {
entry:
%A.ptr = inttoptr i32 %A to i32*
%tmp = getelementptr inbounds i32, i32* %A.ptr, i32 %Offset
br label %bb
bb:
%RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ]
%LHS = getelementptr inbounds i32, i32* %A.ptr, i32 100
%RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1
%cmp0 = ptrtoint i32 *%LHS to i32
%cmp1 = ptrtoint i32 *%RHS to i32
%cond = icmp ult i32 %cmp0, %cmp1
br i1 %cond, label %bb2, label %bb
bb2:
ret i32* %RHS
; CHECK-LABEL: @test2(
; CHECK: %[[TOPTR:[0-9A-Za-z.]+]] = inttoptr i32 %[[ADD:[0-9A-Za-z.]+]] to i32*
; CHECK: %[[INDEX:[0-9A-Za-z.]+]] = phi i32 [ %[[ADD:[0-9A-Za-z.]+]], %bb ], [ %Offset, %entry ]
; CHECK: %[[ADD]] = add i32 %[[INDEX]], 1
; CHECK: %cond = icmp sgt i32 %[[INDEX]], 100
; CHECK: br i1 %cond, label %bb2, label %bb
; CHECK: %[[PTR:[0-9A-Za-z.]+]] = getelementptr inbounds i32, i32* %[[TOPTR]], i32 %[[INDEX]]
; CHECK: ret i32* %[[PTR]]
}
; Perform the transformation only if we know that the GEPs used are inbounds.
define i32 *@test3(i32* %A, i32 %Offset) {
entry:
%tmp = getelementptr i32, i32* %A, i32 %Offset
br label %bb
bb:
%RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ]
%LHS = getelementptr i32, i32* %A, i32 100
%RHS.next = getelementptr i32, i32* %RHS, i64 1
%cond = icmp ult i32 * %LHS, %RHS
br i1 %cond, label %bb2, label %bb
bb2:
ret i32* %RHS
; CHECK-LABEL: @test3(
; CHECK-NOT: %cond = icmp sgt i32 %{{[0-9A-Za-z.]+}}, 100
}
; An inttoptr that requires an extension or truncation will be opaque when determining
; the base pointer. In this case we can still perform the transformation by considering
; A.ptr as being the base pointer.
define i32 *@test4(i16 %A, i32 %Offset) {
entry:
%A.ptr = inttoptr i16 %A to i32*
%tmp = getelementptr inbounds i32, i32* %A.ptr, i32 %Offset
br label %bb
bb:
%RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ]
%LHS = getelementptr inbounds i32, i32* %A.ptr, i32 100
%RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1
%cmp0 = ptrtoint i32 *%LHS to i32
%cmp1 = ptrtoint i32 *%RHS to i32
%cond = icmp ult i32 %cmp0, %cmp1
br i1 %cond, label %bb2, label %bb
bb2:
ret i32* %RHS
; CHECK-LABEL: @test4(
; CHECK: %cond = icmp sgt i32 %{{[0-9A-Za-z.]+}}, 100
}