mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-14 15:19:33 +00:00
Improve tail call elim to move loads above readonly calls
when it allows forming a tail call. Patch by Frits van Bommel. This implements PR4323. llvm-svn: 73752
This commit is contained in:
parent
3a683c551f
commit
8ddc06469c
@ -52,6 +52,7 @@
|
||||
|
||||
#define DEBUG_TYPE "tailcallelim"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/Function.h"
|
||||
@ -201,8 +202,21 @@ bool TailCallElim::runOnFunction(Function &F) {
|
||||
bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
|
||||
// FIXME: We can move load/store/call/free instructions above the call if the
|
||||
// call does not mod/ref the memory location being processed.
|
||||
if (I->mayHaveSideEffects() || isa<LoadInst>(I))
|
||||
if (I->mayHaveSideEffects()) // This also handles volatile loads.
|
||||
return false;
|
||||
|
||||
if (LoadInst* L = dyn_cast<LoadInst>(I)) {
|
||||
// Loads may always be moved above calls without side effects.
|
||||
if (CI->mayHaveSideEffects()) {
|
||||
// Non-volatile loads may be moved above a call with side effects if it
|
||||
// does not write to memory and the load provably won't trap.
|
||||
// FIXME: Writes to memory only matter if they may alias the pointer
|
||||
// being loaded from.
|
||||
if (CI->mayWriteToMemory() ||
|
||||
!isSafeToLoadUnconditionally(L->getPointerOperand(), L))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise, if this is a side-effect free instruction, check to make sure
|
||||
// that it does not use the return value of the call. If it doesn't use the
|
||||
|
64
test/Transforms/TailCallElim/dont_reorder_load.ll
Normal file
64
test/Transforms/TailCallElim/dont_reorder_load.ll
Normal file
@ -0,0 +1,64 @@
|
||||
; RUN: llvm-as <%s | opt -tailcallelim | llvm-dis | grep call | count 3
|
||||
; PR4323
|
||||
|
||||
; Several cases where tail call elimination should not move the load above the
|
||||
; call, and thus can't eliminate the tail recursion.
|
||||
|
||||
|
||||
@extern_weak_global = extern_weak global i32 ; <i32*> [#uses=1]
|
||||
|
||||
|
||||
; This load can't be safely moved above the call because the load is from an
|
||||
; extern_weak global and may trap, but the call may unwind before that happens.
|
||||
define fastcc i32 @no_tailrecelim_1(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
|
||||
entry:
|
||||
%tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
|
||||
br i1 %tmp2, label %if, label %else
|
||||
|
||||
if: ; preds = %entry
|
||||
unwind
|
||||
|
||||
else: ; preds = %entry
|
||||
%tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
|
||||
%tmp8 = call fastcc i32 @no_tailrecelim_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
|
||||
%tmp9 = load i32* @extern_weak_global ; <i32> [#uses=1]
|
||||
%tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
|
||||
ret i32 %tmp10
|
||||
}
|
||||
|
||||
|
||||
; This load can't be safely moved above the call because function may write to the pointer.
|
||||
define fastcc i32 @no_tailrecelim_2(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind {
|
||||
entry:
|
||||
%tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
|
||||
br i1 %tmp2, label %if, label %else
|
||||
|
||||
if: ; preds = %entry
|
||||
store i32 1, i32* %a_arg;
|
||||
ret i32 0;
|
||||
|
||||
else: ; preds = %entry
|
||||
%tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
|
||||
%tmp8 = call fastcc i32 @no_tailrecelim_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
|
||||
%tmp9 = load i32* %a_arg ; <i32> [#uses=1]
|
||||
%tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
|
||||
ret i32 %tmp10
|
||||
}
|
||||
|
||||
; This load can't be safely moved above the call because that would change the
|
||||
; order in which the volatile loads are performed.
|
||||
define fastcc i32 @no_tailrecelim_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind {
|
||||
entry:
|
||||
%tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
|
||||
br i1 %tmp2, label %if, label %else
|
||||
|
||||
if: ; preds = %entry
|
||||
ret i32 0;
|
||||
|
||||
else: ; preds = %entry
|
||||
%tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
|
||||
%tmp8 = call fastcc i32 @no_tailrecelim_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
|
||||
%tmp9 = volatile load i32* %a_arg ; <i32> [#uses=1]
|
||||
%tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
|
||||
ret i32 %tmp10
|
||||
}
|
101
test/Transforms/TailCallElim/reorder_load.ll
Normal file
101
test/Transforms/TailCallElim/reorder_load.ll
Normal file
@ -0,0 +1,101 @@
|
||||
; RUN: llvm-as <%s | opt -tailcallelim | llvm-dis | not grep call
|
||||
; PR4323
|
||||
|
||||
; Several cases where tail call elimination should move the load above the call,
|
||||
; then eliminate the tail recursion.
|
||||
|
||||
|
||||
@global = external global i32 ; <i32*> [#uses=1]
|
||||
@extern_weak_global = extern_weak global i32 ; <i32*> [#uses=1]
|
||||
|
||||
|
||||
; This load can be moved above the call because the function won't write to it
|
||||
; and the call has no side effects.
|
||||
define fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
|
||||
entry:
|
||||
%tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
|
||||
br i1 %tmp2, label %if, label %else
|
||||
|
||||
if: ; preds = %entry
|
||||
ret i32 0
|
||||
|
||||
else: ; preds = %entry
|
||||
%tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
|
||||
%tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
|
||||
%tmp9 = load i32* %a_arg ; <i32> [#uses=1]
|
||||
%tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
|
||||
ret i32 %tmp10
|
||||
}
|
||||
|
||||
|
||||
; This load can be moved above the call because the function won't write to it
|
||||
; and the load provably can't trap.
|
||||
define fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
|
||||
entry:
|
||||
%tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
|
||||
br i1 %tmp2, label %if, label %else
|
||||
|
||||
if: ; preds = %entry
|
||||
ret i32 0
|
||||
|
||||
else: ; preds = %entry
|
||||
%nullcheck = icmp eq i32* %a_arg, null ; <i1> [#uses=1]
|
||||
br i1 %nullcheck, label %unwind, label %recurse
|
||||
|
||||
unwind: ; preds = %else
|
||||
unwind
|
||||
|
||||
recurse: ; preds = %else
|
||||
%tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
|
||||
%tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
|
||||
%tmp9 = load i32* @global ; <i32> [#uses=1]
|
||||
%tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
|
||||
ret i32 %tmp10
|
||||
}
|
||||
|
||||
|
||||
; This load can be safely moved above the call (even though it's from an
|
||||
; extern_weak global) because the call has no side effects.
|
||||
define fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
|
||||
entry:
|
||||
%tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
|
||||
br i1 %tmp2, label %if, label %else
|
||||
|
||||
if: ; preds = %entry
|
||||
ret i32 0
|
||||
|
||||
else: ; preds = %entry
|
||||
%tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
|
||||
%tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
|
||||
%tmp9 = load i32* @extern_weak_global ; <i32> [#uses=1]
|
||||
%tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
|
||||
ret i32 %tmp10
|
||||
}
|
||||
|
||||
|
||||
; The second load can be safely moved above the call even though it's from an
|
||||
; unknown pointer (which normally means it might trap) because the first load
|
||||
; proves it doesn't trap.
|
||||
define fastcc i32 @raise_load_4(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
|
||||
entry:
|
||||
%tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
|
||||
br i1 %tmp2, label %if, label %else
|
||||
|
||||
if: ; preds = %entry
|
||||
ret i32 0
|
||||
|
||||
else: ; preds = %entry
|
||||
%nullcheck = icmp eq i32* %a_arg, null ; <i1> [#uses=1]
|
||||
br i1 %nullcheck, label %unwind, label %recurse
|
||||
|
||||
unwind: ; preds = %else
|
||||
unwind
|
||||
|
||||
recurse: ; preds = %else
|
||||
%tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
|
||||
%first = load i32* %a_arg ; <i32> [#uses=1]
|
||||
%tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7) ; <i32> [#uses=1]
|
||||
%second = load i32* %a_arg ; <i32> [#uses=1]
|
||||
%tmp10 = add i32 %second, %tmp8 ; <i32> [#uses=1]
|
||||
ret i32 %tmp10
|
||||
}
|
Loading…
Reference in New Issue
Block a user