diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index e3d526e7431..f8bb18d0a39 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -110,6 +110,10 @@ namespace llvm { /// entry here. std::vector ArgumentWeights; + /// PointerArgPairWeights - Weights to use when giving an inline bonus to + /// a call site due to correlated pairs of pointers. + DenseMap, unsigned> PointerArgPairWeights; + /// countCodeReductionForConstant - Figure out an approximation for how /// many instructions will be constant folded if the specified value is /// constant. @@ -122,6 +126,18 @@ namespace llvm { unsigned countCodeReductionForAlloca(const CodeMetrics &Metrics, Value *V); + /// countCodeReductionForPointerPair - Count the bonus to apply to an + /// inline call site where a pair of arguments are pointers and one + /// argument is a constant offset from the other. The idea is to + /// recognize a common C++ idiom where a begin and end iterator are + /// actually pointers, and many operations on the pair of them will be + /// constants if the function is called with arguments that have + /// a constant offset. + void countCodeReductionForPointerPair( + const CodeMetrics &Metrics, + DenseMap &PointerArgs, + Value *V, unsigned ArgIdx); + /// analyzeFunction - Add information about the specified function /// to the current structure. void analyzeFunction(Function *F, const TargetData *TD); diff --git a/include/llvm/Value.h b/include/llvm/Value.h index 9896bc76930..380af0b60a4 100644 --- a/include/llvm/Value.h +++ b/include/llvm/Value.h @@ -271,13 +271,13 @@ public: return const_cast(this)->stripPointerCasts(); } - /// stripConstantOffsets - This method strips off unneeded pointer casts and + /// stripInBoundsConstantOffsets - This method strips off unneeded pointer casts and /// all-constant GEPs from the specified value, returning the original /// pointer value. If this is called on a non-pointer value, it returns /// 'this'. - Value *stripConstantOffsets(); - const Value *stripConstantOffsets() const { - return const_cast(this)->stripConstantOffsets(); + Value *stripInBoundsConstantOffsets(); + const Value *stripInBoundsConstantOffsets() const { + return const_cast(this)->stripInBoundsConstantOffsets(); } /// stripInBoundsOffsets - This method strips off unneeded pointer casts and diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index f8d49a22a7f..312973f4a9b 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -381,6 +381,67 @@ unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForAlloca( return Reduction + (CanSROAAlloca ? SROAReduction : 0); } +void InlineCostAnalyzer::FunctionInfo::countCodeReductionForPointerPair( + const CodeMetrics &Metrics, DenseMap &PointerArgs, + Value *V, unsigned ArgIdx) { + SmallVector Worklist; + Worklist.push_back(V); + do { + Value *V = Worklist.pop_back_val(); + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI){ + Instruction *I = cast(*UI); + + if (GetElementPtrInst *GEP = dyn_cast(I)) { + // If the GEP has variable indices, we won't be able to do much with it. + if (!GEP->hasAllConstantIndices()) + continue; + // Unless the GEP is in-bounds, some comparisons will be non-constant. + // Fortunately, the real-world cases where this occurs uses in-bounds + // GEPs, and so we restrict the optimization to them here. + if (!GEP->isInBounds()) + continue; + + // Constant indices just change the constant offset. Add the resulting + // value both to our worklist for this argument, and to the set of + // viable paired values with future arguments. + PointerArgs[GEP] = ArgIdx; + Worklist.push_back(GEP); + continue; + } + + // Track pointer through casts. Even when the result is not a pointer, it + // remains a constant relative to constants derived from other constant + // pointers. + if (CastInst *CI = dyn_cast(I)) { + PointerArgs[CI] = ArgIdx; + Worklist.push_back(CI); + continue; + } + + // There are two instructions which produce a strict constant value when + // applied to two related pointer values. Ignore everything else. + if (!isa(I) && I->getOpcode() != Instruction::Sub) + continue; + assert(I->getNumOperands() == 2); + + // Ensure that the two operands are in our set of potentially paired + // pointers (or are derived from them). + Value *OtherArg = I->getOperand(0); + if (OtherArg == V) + OtherArg = I->getOperand(1); + DenseMap::const_iterator ArgIt + = PointerArgs.find(OtherArg); + if (ArgIt == PointerArgs.end()) + continue; + assert(ArgIt->second < ArgIdx); + + PointerArgPairWeights[std::make_pair(ArgIt->second, ArgIdx)] + += countCodeReductionForConstant(Metrics, I); + } + } while (!Worklist.empty()); +} + /// analyzeFunction - Fill in the current structure with information gleaned /// from the specified function. void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) { @@ -409,12 +470,25 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F, if (Metrics.NumRets==1) --Metrics.NumInsts; - // Check out all of the arguments to the function, figuring out how much - // code can be eliminated if one of the arguments is a constant. ArgumentWeights.reserve(F->arg_size()); - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) + DenseMap PointerArgs; + unsigned ArgIdx = 0; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; + ++I, ++ArgIdx) { + // Count how much code can be eliminated if one of the arguments is + // a constant or an alloca. ArgumentWeights.push_back(ArgInfo(countCodeReductionForConstant(Metrics, I), countCodeReductionForAlloca(Metrics, I))); + + // If the argument is a pointer, also check for pairs of pointers where + // knowing a fixed offset between them allows simplification. This pattern + // arises mostly due to STL algorithm patterns where pointers are used as + // random access iterators. + if (!I->getType()->isPointerTy()) + continue; + PointerArgs[I] = ArgIdx; + countCodeReductionForPointerPair(Metrics, PointerArgs, I, ArgIdx); + } } /// NeverInline - returns true if the function should never be inlined into @@ -563,6 +637,15 @@ int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; } + const DenseMap, unsigned> &ArgPairWeights + = CalleeFI->PointerArgPairWeights; + for (DenseMap, unsigned>::const_iterator I + = ArgPairWeights.begin(), E = ArgPairWeights.end(); + I != E; ++I) + if (CS.getArgument(I->first.first)->stripInBoundsConstantOffsets() == + CS.getArgument(I->first.second)->stripInBoundsConstantOffsets()) + InlineCost -= I->second; + // Each argument passed in has a cost at both the caller and the callee // sides. Measurements show that each argument costs about the same as an // instruction. diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index 40b2d95e3d0..41cc38c90af 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -321,9 +321,8 @@ namespace { // Various metrics for how much to strip off of pointers. enum PointerStripKind { PSK_ZeroIndices, - PSK_ConstantIndices, - PSK_InBounds, - PSK_All + PSK_InBoundsConstantIndices, + PSK_InBounds }; template @@ -343,16 +342,14 @@ static Value *stripPointerCastsAndOffsets(Value *V) { if (!GEP->hasAllZeroIndices()) return V; break; - case PSK_ConstantIndices: + case PSK_InBoundsConstantIndices: if (!GEP->hasAllConstantIndices()) return V; - break; + // fallthrough case PSK_InBounds: if (!GEP->isInBounds()) return V; break; - case PSK_All: - break; } V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { @@ -375,8 +372,8 @@ Value *Value::stripPointerCasts() { return stripPointerCastsAndOffsets(this); } -Value *Value::stripConstantOffsets() { - return stripPointerCastsAndOffsets(this); +Value *Value::stripInBoundsConstantOffsets() { + return stripPointerCastsAndOffsets(this); } Value *Value::stripInBoundsOffsets() { diff --git a/test/Transforms/Inline/ptr-diff.ll b/test/Transforms/Inline/ptr-diff.ll new file mode 100644 index 00000000000..0b431d6d90e --- /dev/null +++ b/test/Transforms/Inline/ptr-diff.ll @@ -0,0 +1,56 @@ +; RUN: opt -inline < %s -S -o - -inline-threshold=10 | FileCheck %s + +define i32 @outer1() { +; CHECK: @outer1 +; CHECK-NOT: call +; CHECK: ret i32 + + %ptr = alloca i32 + %ptr1 = getelementptr inbounds i32* %ptr, i32 0 + %ptr2 = getelementptr inbounds i32* %ptr, i32 42 + %result = call i32 @inner1(i32* %ptr1, i32* %ptr2) + ret i32 %result +} + +define i32 @inner1(i32* %begin, i32* %end) { + %begin.i = ptrtoint i32* %begin to i32 + %end.i = ptrtoint i32* %end to i32 + %distance = sub i32 %end.i, %begin.i + %icmp = icmp sle i32 %distance, 42 + br i1 %icmp, label %then, label %else + +then: + ret i32 3 + +else: + %t = load i32* %begin + ret i32 %t +} + +define i32 @outer2(i32* %ptr) { +; Test that an inbounds GEP disables this -- it isn't safe in general as +; wrapping changes the behavior of lessthan and greaterthan comparisions. +; CHECK: @outer2 +; CHECK: call i32 @inner2 +; CHECK: ret i32 + + %ptr1 = getelementptr i32* %ptr, i32 0 + %ptr2 = getelementptr i32* %ptr, i32 42 + %result = call i32 @inner2(i32* %ptr1, i32* %ptr2) + ret i32 %result +} + +define i32 @inner2(i32* %begin, i32* %end) { + %begin.i = ptrtoint i32* %begin to i32 + %end.i = ptrtoint i32* %end to i32 + %distance = sub i32 %end.i, %begin.i + %icmp = icmp sle i32 %distance, 42 + br i1 %icmp, label %then, label %else + +then: + ret i32 3 + +else: + %t = load i32* %begin + ret i32 %t +}