diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index cb9e5526fc4..b478b503ba9 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -159,10 +159,18 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { // it. This is a hack because we depend on the user marking their local // variables as volatile if they are live across a setjmp call, and they // probably won't do this in callers. - if (Function *F = CS.getCalledFunction()) + if (Function *F = CS.getCalledFunction()) { if (F->isDeclaration() && (F->getName() == "setjmp" || F->getName() == "_setjmp")) NeverInline = true; + + // If this call is to function itself, then the function is recursive. + // Inlining it into other functions is a bad idea, because this is + // basically just a form of loop peeling, and our metrics aren't useful + // for that case. + if (F == BB->getParent()) + NeverInline = true; + } if (!isa(II) && !callIsSmall(CS.getCalledFunction())) { // Each argument to a call takes on average one instruction to set up. diff --git a/test/Transforms/Inline/2009-01-12-RecursiveInline.ll b/test/Transforms/Inline/2009-01-12-RecursiveInline.ll deleted file mode 100644 index 1a3325a68b6..00000000000 --- a/test/Transforms/Inline/2009-01-12-RecursiveInline.ll +++ /dev/null @@ -1,92 +0,0 @@ -; RUN: opt < %s -inline -S | grep {call.*fib} | count 4 -; First call to fib from fib is inlined, producing 2 instead of 1, total 3. -; Second call to fib from fib is not inlined because new body of fib exceeds -; inlining limit of 200. Plus call in main = 4 total. - -; ModuleID = '' -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin9.6" -@"\01LC" = internal constant [5 x i8] c"%ld\0A\00" ; <[5 x i8]*> [#uses=1] - -define i32 @fib(i32 %n) nounwind { -entry: - %n_addr = alloca i32 ; [#uses=4] - %retval = alloca i32 ; [#uses=2] - %0 = alloca i32 ; [#uses=3] - %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] - store i32 %n, i32* %n_addr - %1 = load i32* %n_addr, align 4 ; [#uses=1] - %2 = icmp ule i32 %1, 1 ; [#uses=1] - br i1 %2, label %bb, label %bb1 - -bb: ; preds = %entry - store i32 1, i32* %0, align 4 - br label %bb2 - -bb1: ; preds = %entry - %3 = load i32* %n_addr, align 4 ; [#uses=1] - %4 = sub i32 %3, 2 ; [#uses=1] - %5 = call i32 @fib(i32 %4) nounwind ; [#uses=1] - %6 = load i32* %n_addr, align 4 ; [#uses=1] - %7 = sub i32 %6, 1 ; [#uses=1] - %8 = call i32 @fib(i32 %7) nounwind ; [#uses=1] - %9 = add i32 %5, %8 ; [#uses=1] - store i32 %9, i32* %0, align 4 - br label %bb2 - -bb2: ; preds = %bb1, %bb - %10 = load i32* %0, align 4 ; [#uses=1] - store i32 %10, i32* %retval, align 4 - br label %return - -return: ; preds = %bb2 - %retval3 = load i32* %retval ; [#uses=1] - ret i32 %retval3 -} - -define i32 @main(i32 %argc, i8** %argv) nounwind { -entry: - %argc_addr = alloca i32 ; [#uses=2] - %argv_addr = alloca i8** ; [#uses=2] - %retval = alloca i32 ; [#uses=2] - %N = alloca i32 ; [#uses=2] - %0 = alloca i32 ; [#uses=2] - %iftmp.0 = alloca i32 ; [#uses=3] - %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] - store i32 %argc, i32* %argc_addr - store i8** %argv, i8*** %argv_addr - %1 = load i32* %argc_addr, align 4 ; [#uses=1] - %2 = icmp eq i32 %1, 2 ; [#uses=1] - br i1 %2, label %bb, label %bb1 - -bb: ; preds = %entry - %3 = load i8*** %argv_addr, align 4 ; [#uses=1] - %4 = getelementptr i8** %3, i32 1 ; [#uses=1] - %5 = load i8** %4, align 4 ; [#uses=1] - %6 = call i32 @atoi(i8* %5) nounwind ; [#uses=1] - store i32 %6, i32* %iftmp.0, align 4 - br label %bb2 - -bb1: ; preds = %entry - store i32 43, i32* %iftmp.0, align 4 - br label %bb2 - -bb2: ; preds = %bb1, %bb - %7 = load i32* %iftmp.0, align 4 ; [#uses=1] - store i32 %7, i32* %N, align 4 - %8 = load i32* %N, align 4 ; [#uses=1] - %9 = call i32 @fib(i32 %8) nounwind ; [#uses=1] - %10 = call i32 (i8*, ...)* @printf(i8* getelementptr ([5 x i8]* @"\01LC", i32 0, i32 0), i32 %9) nounwind ; [#uses=0] - store i32 0, i32* %0, align 4 - %11 = load i32* %0, align 4 ; [#uses=1] - store i32 %11, i32* %retval, align 4 - br label %return - -return: ; preds = %bb2 - %retval3 = load i32* %retval ; [#uses=1] - ret i32 %retval3 -} - -declare i32 @atoi(i8*) - -declare i32 @printf(i8*, ...) nounwind diff --git a/test/Transforms/Inline/noinline-recursive-fn.ll b/test/Transforms/Inline/noinline-recursive-fn.ll new file mode 100644 index 00000000000..dcae0243300 --- /dev/null +++ b/test/Transforms/Inline/noinline-recursive-fn.ll @@ -0,0 +1,32 @@ +; The inliner should never inline recursive functions into other functions. +; This effectively is just peeling off the first iteration of a loop, and the +; inliner heuristics are not set up for this. + +; RUN: opt -inline %s -S | grep "call void @foo(i32 42)" + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.3" + +@g = common global i32 0 ; [#uses=1] + +define internal void @foo(i32 %x) nounwind ssp { +entry: + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + %0 = icmp slt i32 %x, 0 ; [#uses=1] + br i1 %0, label %return, label %bb + +bb: ; preds = %entry + %1 = sub nsw i32 %x, 1 ; [#uses=1] + call void @foo(i32 %1) nounwind ssp + volatile store i32 1, i32* @g, align 4 + ret void + +return: ; preds = %entry + ret void +} + +define void @bonk() nounwind ssp { +entry: + call void @foo(i32 42) nounwind ssp + ret void +}