diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 50258c6f85e..216ba6578ed 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -55,7 +55,7 @@ STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt EnableARMTailCalls("arm-tail-calls", cl::Hidden, cl::desc("Generate tail calls (TEMPORARY OPTION)."), - cl::init(false)); + cl::init(true)); static cl::opt EnableARMLongCalls("arm-long-calls", cl::Hidden, diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll new file mode 100644 index 00000000000..8103fab2092 --- /dev/null +++ b/test/CodeGen/ARM/call-tc.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECKV4 +; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5 +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\ +; RUN: -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF + +@t = weak global i32 ()* null ; [#uses=1] + +declare void @g(i32, i32, i32, i32) + +define void @f() { +; CHECKELF: PLT + call void @g( i32 1, i32 2, i32 3, i32 4 ) + ret void +} + +define void @g.upgrd.1() { +; CHECKV4: bx r0 @ TAILCALL +; CHECKV5: bx r0 @ TAILCALL + %tmp = load i32 ()** @t ; [#uses=1] + %tmp.upgrd.2 = tail call i32 %tmp( ) ; [#uses=0] + ret void +} + +define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind { +; CHECKV4: m_231b +; CHECKV4: bx r{{.*}} +BB0: + %5 = inttoptr i32 %0 to i32* ; [#uses=1] + %t35 = volatile load i32* %5 ; [#uses=1] + %6 = inttoptr i32 %t35 to i32** ; [#uses=1] + %7 = getelementptr i32** %6, i32 86 ; [#uses=1] + %8 = load i32** %7 ; [#uses=1] + %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; [#uses=1] + %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; [#uses=1] + ret i32* %10 +} diff --git a/test/CodeGen/ARM/ifcvt6-tc.ll b/test/CodeGen/ARM/ifcvt6-tc.ll new file mode 100644 index 00000000000..5b28804f380 --- /dev/null +++ b/test/CodeGen/ARM/ifcvt6-tc.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \ +; RUN: grep cmpne | count 1 +; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \ +; RUN: grep bhi | count 1 +; Here, tail call wins over eliminating branches. It is 1 fewer instruction +; and removes all stack accesses, so seems like a win. + +define void @foo(i32 %X, i32 %Y) { +entry: + %tmp1 = icmp ult i32 %X, 4 ; [#uses=1] + %tmp4 = icmp eq i32 %Y, 0 ; [#uses=1] + %tmp7 = or i1 %tmp4, %tmp1 ; [#uses=1] + br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock + +cond_true: ; preds = %entry + %tmp10 = tail call i32 (...)* @bar( ) ; [#uses=0] + ret void + +UnifiedReturnBlock: ; preds = %entry + ret void +} + +declare i32 @bar(...) diff --git a/test/CodeGen/ARM/insn-sched1-tc.ll b/test/CodeGen/ARM/insn-sched1-tc.ll new file mode 100644 index 00000000000..c457c8c5a55 --- /dev/null +++ b/test/CodeGen/ARM/insn-sched1-tc.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=arm -mattr=+v6 +; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\ +; RUN: grep mov | count 2 + +define i32 @test(i32 %x) { + %tmp = trunc i32 %x to i16 ; [#uses=1] + %tmp2 = tail call i32 @f( i32 1, i16 %tmp ) ; [#uses=1] + ret i32 %tmp2 +} + +declare i32 @f(i32, i16) diff --git a/test/CodeGen/ARM/ldm-tc.ll b/test/CodeGen/ARM/ldm-tc.ll new file mode 100644 index 00000000000..3819192429e --- /dev/null +++ b/test/CodeGen/ARM/ldm-tc.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s + +@X = external global [0 x i32] ; <[0 x i32]*> [#uses=5] + +define i32 @t1() { +; CHECK: t1: +; CHECK: ldmia + %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; [#uses=1] + %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; [#uses=1] + %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 ) ; [#uses=1] + ret i32 %tmp4 +} + +define i32 @t2() { +; CHECK: t2: +; CHECK: ldmia + %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; [#uses=1] + %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; [#uses=1] + %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; [#uses=1] + %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; [#uses=1] + ret i32 %tmp6 +} + +define i32 @t3() { +; CHECK: t3: +; CHECK: ldmib +; CHECK: b.w _f2 @ TAILCALL + %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; [#uses=1] + %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; [#uses=1] + %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; [#uses=1] + %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; [#uses=1] + ret i32 %tmp6 +} + +declare i32 @f1(i32, i32) + +declare i32 @f2(i32, i32, i32) diff --git a/test/CodeGen/Thumb2/thumb2-call-tc.ll b/test/CodeGen/Thumb2/thumb2-call-tc.ll new file mode 100644 index 00000000000..d31ae0cc455 --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-call-tc.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN +; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX + +@t = weak global i32 ()* null ; [#uses=1] + +declare void @g(i32, i32, i32, i32) + +define void @f() { +; DARWIN: f: +; DARWIN: blx _g + +; LINUX: f: +; LINUX: bl g + call void @g( i32 1, i32 2, i32 3, i32 4 ) + ret void +} + +define void @h() { +; DARWIN: h: +; DARWIN: bx r0 @ TAILCALL + +; LINUX: h: +; LINUX: bx r0 @ TAILCALL + %tmp = load i32 ()** @t ; [#uses=1] + %tmp.upgrd.2 = tail call i32 %tmp( ) ; [#uses=0] + ret void +} diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll new file mode 100644 index 00000000000..c0244154771 --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll @@ -0,0 +1,86 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s + +define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { +; CHECK: t1: +; CHECK: it ne +; CHECK: cmpne + switch i32 %c, label %cond_next [ + i32 1, label %cond_true + i32 7, label %cond_true + ] + +cond_true: + %tmp12 = add i32 %a, 1 + %tmp1518 = add i32 %tmp12, %b + ret i32 %tmp1518 + +cond_next: + %tmp15 = add i32 %b, %a + ret i32 %tmp15 +} + +; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt. +define i32 @t2(i32 %a, i32 %b) nounwind { +entry: +; CHECK: t2: +; CHECK: ite gt +; CHECK: subgt +; CHECK: suble + %tmp1434 = icmp eq i32 %a, %b ; [#uses=1] + br i1 %tmp1434, label %bb17, label %bb.outer + +bb.outer: ; preds = %cond_false, %entry + %b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ] ; [#uses=5] + %a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ] ; [#uses=1] + br label %bb + +bb: ; preds = %cond_true, %bb.outer + %indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ] ; [#uses=2] + %tmp. = sub i32 0, %b_addr.021.0.ph ; [#uses=1] + %tmp.40 = mul i32 %indvar, %tmp. ; [#uses=1] + %a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph ; [#uses=6] + %tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph ; [#uses=1] + br i1 %tmp3, label %cond_true, label %cond_false + +cond_true: ; preds = %bb + %tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph ; [#uses=2] + %tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph ; [#uses=1] + %indvar.next = add i32 %indvar, 1 ; [#uses=1] + br i1 %tmp1437, label %bb17, label %bb + +cond_false: ; preds = %bb + %tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0 ; [#uses=2] + %tmp14 = icmp eq i32 %a_addr.026.0, %tmp10 ; [#uses=1] + br i1 %tmp14, label %bb17, label %bb.outer + +bb17: ; preds = %cond_false, %cond_true, %entry + %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; [#uses=1] + ret i32 %a_addr.026.1 +} + +@x = external global i32* ; [#uses=1] + +define void @foo(i32 %a) nounwind { +entry: + %tmp = load i32** @x ; [#uses=1] + store i32 %a, i32* %tmp + ret void +} + +; Tail call prevents use of ifcvt in this one. Seems like a win though. +define void @t3(i32 %a, i32 %b) nounwind { +entry: +; CHECK: t3: +; CHECK-NOT: it lt +; CHECK-NOT: poplt +; CHECK: b.w _foo @ TAILCALL + %tmp1 = icmp sgt i32 %a, 10 ; [#uses=1] + br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock + +cond_true: ; preds = %entry + tail call void @foo( i32 %b ) + ret void + +UnifiedReturnBlock: ; preds = %entry + ret void +}