diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index de89fea1193..685050765a7 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -96,6 +96,7 @@ class CallAnalyzer : public InstVisitor { int InstructionCost); bool isGEPOffsetConstant(GetElementPtrInst &GEP); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); + bool simplifyCallSite(Function *F, CallSite CS); ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); // Custom analysis routines. @@ -124,6 +125,8 @@ class CallAnalyzer : public InstVisitor { bool visitBinaryOperator(BinaryOperator &I); bool visitLoad(LoadInst &I); bool visitStore(StoreInst &I); + bool visitExtractValue(ExtractValueInst &I); + bool visitInsertValue(InsertValueInst &I); bool visitCallSite(CallSite CS); public: @@ -610,6 +613,73 @@ bool CallAnalyzer::visitStore(StoreInst &I) { return false; } +bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { + // Constant folding for extract value is trivial. + Constant *C = dyn_cast(I.getAggregateOperand()); + if (!C) + C = SimplifiedValues.lookup(I.getAggregateOperand()); + if (C) { + SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices()); + return true; + } + + // SROA can look through these but give them a cost. + return false; +} + +bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { + // Constant folding for insert value is trivial. + Constant *AggC = dyn_cast(I.getAggregateOperand()); + if (!AggC) + AggC = SimplifiedValues.lookup(I.getAggregateOperand()); + Constant *InsertedC = dyn_cast(I.getInsertedValueOperand()); + if (!InsertedC) + InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand()); + if (AggC && InsertedC) { + SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC, + I.getIndices()); + return true; + } + + // SROA can look through these but give them a cost. + return false; +} + +/// \brief Try to simplify a call site. +/// +/// Takes a concrete function and callsite and tries to actually simplify it by +/// analyzing the arguments and call itself with instsimplify. Returns true if +/// it has simplified the callsite to some other entity (a constant), making it +/// free. +bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { + // FIXME: Using the instsimplify logic directly for this is inefficient + // because we have to continually rebuild the argument list even when no + // simplifications can be performed. Until that is fixed with remapping + // inside of instsimplify, directly constant fold calls here. + if (!canConstantFoldCallTo(F)) + return false; + + // Try to re-map the arguments to constants. + SmallVector ConstantArgs; + ConstantArgs.reserve(CS.arg_size()); + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) { + Constant *C = dyn_cast(*I); + if (!C) + C = dyn_cast_or_null(SimplifiedValues.lookup(*I)); + if (!C) + return false; // This argument doesn't map to a constant. + + ConstantArgs.push_back(C); + } + if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { + SimplifiedValues[CS.getInstruction()] = C; + return true; + } + + return false; +} + bool CallAnalyzer::visitCallSite(CallSite CS) { if (CS.isCall() && cast(CS.getInstruction())->canReturnTwice() && !F.getFnAttributes().hasAttribute(Attribute::ReturnsTwice)) { @@ -621,20 +691,26 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { cast(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate)) ContainsNoDuplicateCall = true; - if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { - switch (II->getIntrinsicID()) { - default: - return Base::visitCallSite(CS); - - case Intrinsic::memset: - case Intrinsic::memcpy: - case Intrinsic::memmove: - // SROA can usually chew through these intrinsics, but they aren't free. - return false; - } - } - if (Function *F = CS.getCalledFunction()) { + // When we have a concrete function, first try to simplify it directly. + if (simplifyCallSite(F, CS)) + return true; + + // Next check if it is an intrinsic we know about. + // FIXME: Lift this into part of the InstVisitor. + if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { + switch (II->getIntrinsicID()) { + default: + return Base::visitCallSite(CS); + + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + // SROA can usually chew through these intrinsics, but they aren't free. + return false; + } + } + if (F == CS.getInstruction()->getParent()->getParent()) { // This flag will fully abort the analysis, so don't bother with anything // else. diff --git a/test/Transforms/Inline/inline_constprop.ll b/test/Transforms/Inline/inline_constprop.ll index 0b48a7282f4..b2a14fe0b71 100644 --- a/test/Transforms/Inline/inline_constprop.ll +++ b/test/Transforms/Inline/inline_constprop.ll @@ -111,6 +111,44 @@ bb.false: ret i32 %sub } +declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b) + +define i8 @caller4(i8 %z) { +; Check that we can constant fold through intrinsics such as the +; overflow-detecting arithmetic instrinsics. These are particularly important +; as they are used heavily in standard library code and generic C++ code where +; the arguments are oftent constant but complete generality is required. +; +; CHECK: @caller4 +; CHECK-NOT: call +; CHECK: ret i8 -1 + +entry: + %x = call i8 @callee4(i8 254, i8 14, i8 %z) + ret i8 %x +} + +define i8 @callee4(i8 %x, i8 %y, i8 %z) { + %uadd = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y) + %o = extractvalue {i8, i1} %uadd, 1 + br i1 %o, label %bb.true, label %bb.false + +bb.true: + ret i8 -1 + +bb.false: + ; This block musn't be counted in the inline cost. + %z1 = add i8 %z, 1 + %z2 = add i8 %z1, 1 + %z3 = add i8 %z2, 1 + %z4 = add i8 %z3, 1 + %z5 = add i8 %z4, 1 + %z6 = add i8 %z5, 1 + %z7 = add i8 %z6, 1 + %z8 = add i8 %z7, 1 + ret i8 %z8 +} + define i32 @PR13412.main() { ; This is a somewhat complicated three layer subprogram that was reported to