mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-26 06:04:47 +00:00
Enable tail calls on ARM by default, with some
basic tests. This has been well tested on Darwin but not elsewhere. It should work provided the linker correctly resolves B.W <label in other function> which it has not seen before, at least from llvm-based compilers. I'm leaving the arm-tail-calls switch in until I see if there's any problems because of that; it might need to be disabled for some environments. llvm-svn: 106299
This commit is contained in:
parent
f03db5dcc6
commit
a441c8fd45
@ -55,7 +55,7 @@ STATISTIC(NumTailCalls, "Number of tail calls");
|
||||
static cl::opt<bool>
|
||||
EnableARMTailCalls("arm-tail-calls", cl::Hidden,
|
||||
cl::desc("Generate tail calls (TEMPORARY OPTION)."),
|
||||
cl::init(false));
|
||||
cl::init(true));
|
||||
|
||||
static cl::opt<bool>
|
||||
EnableARMLongCalls("arm-long-calls", cl::Hidden,
|
||||
|
36
test/CodeGen/ARM/call-tc.ll
Normal file
36
test/CodeGen/ARM/call-tc.ll
Normal file
@ -0,0 +1,36 @@
|
||||
; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECKV4
|
||||
; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5
|
||||
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
|
||||
; RUN: -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF
|
||||
|
||||
@t = weak global i32 ()* null ; <i32 ()**> [#uses=1]
|
||||
|
||||
declare void @g(i32, i32, i32, i32)
|
||||
|
||||
define void @f() {
|
||||
; CHECKELF: PLT
|
||||
call void @g( i32 1, i32 2, i32 3, i32 4 )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @g.upgrd.1() {
|
||||
; CHECKV4: bx r0 @ TAILCALL
|
||||
; CHECKV5: bx r0 @ TAILCALL
|
||||
%tmp = load i32 ()** @t ; <i32 ()*> [#uses=1]
|
||||
%tmp.upgrd.2 = tail call i32 %tmp( ) ; <i32> [#uses=0]
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {
|
||||
; CHECKV4: m_231b
|
||||
; CHECKV4: bx r{{.*}}
|
||||
BB0:
|
||||
%5 = inttoptr i32 %0 to i32* ; <i32*> [#uses=1]
|
||||
%t35 = volatile load i32* %5 ; <i32> [#uses=1]
|
||||
%6 = inttoptr i32 %t35 to i32** ; <i32**> [#uses=1]
|
||||
%7 = getelementptr i32** %6, i32 86 ; <i32**> [#uses=1]
|
||||
%8 = load i32** %7 ; <i32*> [#uses=1]
|
||||
%9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; <i32* (i32, i32*, i32, i32*, i32*, i32*)*> [#uses=1]
|
||||
%10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; <i32*> [#uses=1]
|
||||
ret i32* %10
|
||||
}
|
23
test/CodeGen/ARM/ifcvt6-tc.ll
Normal file
23
test/CodeGen/ARM/ifcvt6-tc.ll
Normal file
@ -0,0 +1,23 @@
|
||||
; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
|
||||
; RUN: grep cmpne | count 1
|
||||
; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
|
||||
; RUN: grep bhi | count 1
|
||||
; Here, tail call wins over eliminating branches. It is 1 fewer instruction
|
||||
; and removes all stack accesses, so seems like a win.
|
||||
|
||||
define void @foo(i32 %X, i32 %Y) {
|
||||
entry:
|
||||
%tmp1 = icmp ult i32 %X, 4 ; <i1> [#uses=1]
|
||||
%tmp4 = icmp eq i32 %Y, 0 ; <i1> [#uses=1]
|
||||
%tmp7 = or i1 %tmp4, %tmp1 ; <i1> [#uses=1]
|
||||
br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
|
||||
|
||||
cond_true: ; preds = %entry
|
||||
%tmp10 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
|
||||
ret void
|
||||
|
||||
UnifiedReturnBlock: ; preds = %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @bar(...)
|
11
test/CodeGen/ARM/insn-sched1-tc.ll
Normal file
11
test/CodeGen/ARM/insn-sched1-tc.ll
Normal file
@ -0,0 +1,11 @@
|
||||
; RUN: llc < %s -march=arm -mattr=+v6
|
||||
; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
|
||||
; RUN: grep mov | count 2
|
||||
|
||||
define i32 @test(i32 %x) {
|
||||
%tmp = trunc i32 %x to i16 ; <i16> [#uses=1]
|
||||
%tmp2 = tail call i32 @f( i32 1, i16 %tmp ) ; <i32> [#uses=1]
|
||||
ret i32 %tmp2
|
||||
}
|
||||
|
||||
declare i32 @f(i32, i16)
|
37
test/CodeGen/ARM/ldm-tc.ll
Normal file
37
test/CodeGen/ARM/ldm-tc.ll
Normal file
@ -0,0 +1,37 @@
|
||||
; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
|
||||
|
||||
@X = external global [0 x i32] ; <[0 x i32]*> [#uses=5]
|
||||
|
||||
define i32 @t1() {
|
||||
; CHECK: t1:
|
||||
; CHECK: ldmia
|
||||
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
|
||||
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
|
||||
%tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 ) ; <i32> [#uses=1]
|
||||
ret i32 %tmp4
|
||||
}
|
||||
|
||||
define i32 @t2() {
|
||||
; CHECK: t2:
|
||||
; CHECK: ldmia
|
||||
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
|
||||
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
|
||||
%tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
|
||||
%tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
|
||||
ret i32 %tmp6
|
||||
}
|
||||
|
||||
define i32 @t3() {
|
||||
; CHECK: t3:
|
||||
; CHECK: ldmib
|
||||
; CHECK: b.w _f2 @ TAILCALL
|
||||
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
|
||||
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
|
||||
%tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
|
||||
%tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
|
||||
ret i32 %tmp6
|
||||
}
|
||||
|
||||
declare i32 @f1(i32, i32)
|
||||
|
||||
declare i32 @f2(i32, i32, i32)
|
27
test/CodeGen/Thumb2/thumb2-call-tc.ll
Normal file
27
test/CodeGen/Thumb2/thumb2-call-tc.ll
Normal file
@ -0,0 +1,27 @@
|
||||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN
|
||||
; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX
|
||||
|
||||
@t = weak global i32 ()* null ; <i32 ()**> [#uses=1]
|
||||
|
||||
declare void @g(i32, i32, i32, i32)
|
||||
|
||||
define void @f() {
|
||||
; DARWIN: f:
|
||||
; DARWIN: blx _g
|
||||
|
||||
; LINUX: f:
|
||||
; LINUX: bl g
|
||||
call void @g( i32 1, i32 2, i32 3, i32 4 )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @h() {
|
||||
; DARWIN: h:
|
||||
; DARWIN: bx r0 @ TAILCALL
|
||||
|
||||
; LINUX: h:
|
||||
; LINUX: bx r0 @ TAILCALL
|
||||
%tmp = load i32 ()** @t ; <i32 ()*> [#uses=1]
|
||||
%tmp.upgrd.2 = tail call i32 %tmp( ) ; <i32> [#uses=0]
|
||||
ret void
|
||||
}
|
86
test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
Normal file
86
test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
Normal file
@ -0,0 +1,86 @@
|
||||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
|
||||
|
||||
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
|
||||
; CHECK: t1:
|
||||
; CHECK: it ne
|
||||
; CHECK: cmpne
|
||||
switch i32 %c, label %cond_next [
|
||||
i32 1, label %cond_true
|
||||
i32 7, label %cond_true
|
||||
]
|
||||
|
||||
cond_true:
|
||||
%tmp12 = add i32 %a, 1
|
||||
%tmp1518 = add i32 %tmp12, %b
|
||||
ret i32 %tmp1518
|
||||
|
||||
cond_next:
|
||||
%tmp15 = add i32 %b, %a
|
||||
ret i32 %tmp15
|
||||
}
|
||||
|
||||
; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
|
||||
define i32 @t2(i32 %a, i32 %b) nounwind {
|
||||
entry:
|
||||
; CHECK: t2:
|
||||
; CHECK: ite gt
|
||||
; CHECK: subgt
|
||||
; CHECK: suble
|
||||
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
|
||||
br i1 %tmp1434, label %bb17, label %bb.outer
|
||||
|
||||
bb.outer: ; preds = %cond_false, %entry
|
||||
%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ] ; <i32> [#uses=5]
|
||||
%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
|
||||
br label %bb
|
||||
|
||||
bb: ; preds = %cond_true, %bb.outer
|
||||
%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2]
|
||||
%tmp. = sub i32 0, %b_addr.021.0.ph ; <i32> [#uses=1]
|
||||
%tmp.40 = mul i32 %indvar, %tmp. ; <i32> [#uses=1]
|
||||
%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph ; <i32> [#uses=6]
|
||||
%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph ; <i1> [#uses=1]
|
||||
br i1 %tmp3, label %cond_true, label %cond_false
|
||||
|
||||
cond_true: ; preds = %bb
|
||||
%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph ; <i32> [#uses=2]
|
||||
%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph ; <i1> [#uses=1]
|
||||
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
|
||||
br i1 %tmp1437, label %bb17, label %bb
|
||||
|
||||
cond_false: ; preds = %bb
|
||||
%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0 ; <i32> [#uses=2]
|
||||
%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10 ; <i1> [#uses=1]
|
||||
br i1 %tmp14, label %bb17, label %bb.outer
|
||||
|
||||
bb17: ; preds = %cond_false, %cond_true, %entry
|
||||
%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
|
||||
ret i32 %a_addr.026.1
|
||||
}
|
||||
|
||||
@x = external global i32* ; <i32**> [#uses=1]
|
||||
|
||||
define void @foo(i32 %a) nounwind {
|
||||
entry:
|
||||
%tmp = load i32** @x ; <i32*> [#uses=1]
|
||||
store i32 %a, i32* %tmp
|
||||
ret void
|
||||
}
|
||||
|
||||
; Tail call prevents use of ifcvt in this one. Seems like a win though.
|
||||
define void @t3(i32 %a, i32 %b) nounwind {
|
||||
entry:
|
||||
; CHECK: t3:
|
||||
; CHECK-NOT: it lt
|
||||
; CHECK-NOT: poplt
|
||||
; CHECK: b.w _foo @ TAILCALL
|
||||
%tmp1 = icmp sgt i32 %a, 10 ; <i1> [#uses=1]
|
||||
br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
|
||||
|
||||
cond_true: ; preds = %entry
|
||||
tail call void @foo( i32 %b )
|
||||
ret void
|
||||
|
||||
UnifiedReturnBlock: ; preds = %entry
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user