diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index f20f7dc5d6d..f0a661cb699 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -132,8 +132,10 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { unsigned LoopSize = ApproximateLoopSize(L, NumCalls); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); if (NumCalls != 0) { - DEBUG(dbgs() << " Not unrolling loop with function calls.\n"); - return false; + // Even for a loop that contains calls, it can still be profitable to + // unroll if the loop is really, REALLY small. + DEBUG(dbgs() <<" Using lower threshold for loop with function calls.\n"); + CurrentThreshold = OptSizeUnrollThreshold; } uint64_t Size = (uint64_t)LoopSize*Count; if (TripCount != 1 && Size > CurrentThreshold) { diff --git a/test/Transforms/LoopUnroll/call.ll b/test/Transforms/LoopUnroll/call.ll new file mode 100644 index 00000000000..5dcb3bc2af4 --- /dev/null +++ b/test/Transforms/LoopUnroll/call.ll @@ -0,0 +1,51 @@ +; RUN: opt < %s -S -loop-unroll | FileCheck %s + +@id = internal global i32 0 +@val = internal global [4 x i32] zeroinitializer, align 16 + +; CHECK: @test +define i32 @test(i32 %k) nounwind ssp { +; CHECK-NOT: call i32 @test(i32 %t.06) +; CHECK: call i32 @test(i32 0) +; CHECK-NOT: call i32 @test(i32 %t.06) +; CHECK: call i32 @test(i32 1) +; CHECK-NOT: call i32 @test(i32 %t.06) +; CHECK: call i32 @test(i32 2) +; CHECK-NOT: call i32 @test(i32 %t.06) +; CHECK: call i32 @test(i32 3) +; CHECK-NOT: call i32 @test(i32 %t.06) + +bb.nph: + %0 = load i32* @id, align 4 + %1 = add nsw i32 %0, 1 + store i32 %1, i32* @id, align 4 + %2 = sext i32 %k to i64 + %3 = getelementptr inbounds [4 x i32]* @val, i64 0, i64 %2 + store i32 %1, i32* %3, align 4 + br label %bb + +bb: ; preds = %bb2, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb2 ] + %scevgep = getelementptr [4 x i32]* @val, i64 0, i64 %indvar + %4 = load i32* %scevgep, align 4 + %5 = icmp eq i32 %4, 0 + br i1 %5, label %bb1, label %bb2 + +bb1: ; preds = %bb + %t.06 = trunc i64 %indvar to i32 + %6 = tail call i32 @test(i32 %t.06) nounwind + br label %bb2 + +bb2: ; preds = %bb1, %bb + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 4 + br i1 %exitcond, label %bb4, label %bb + +bb4: ; preds = %bb2 + %.pre = load i32* @id, align 4 + %7 = add nsw i32 %.pre, -1 + store i32 %7, i32* @id, align 4 + store i32 0, i32* %3, align 4 + ret i32 undef +; CHECK: } +}