diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index f5eb7055726..4838d856ae3 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -73,11 +73,6 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, Type *BaseType = nullptr; unsigned BaseAlign = 0; if (const AllocaInst *AI = dyn_cast(Base)) { - // Loading directly from an alloca is trivially safe. We can't even look - // through pointer casts here though, as that might change the size loaded. - if (AI == V) - return true; - // An alloca is safe to load from as load as it is suitably aligned. BaseType = AI->getAllocatedType(); BaseAlign = AI->getAlignment(); @@ -86,12 +81,6 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, // overridden. Their size may change or they may be weak and require a test // to determine if they were in fact provided. if (!GV->mayBeOverridden()) { - // Loading directly from the non-overridden global is trivially safe. We - // can't even look through pointer casts here though, as that might change - // the size loaded. - if (GV == V) - return true; - BaseType = GV->getType()->getElementType(); BaseAlign = GV->getAlignment(); } diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index b7580255150..6fe5e188b1a 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -63,6 +63,7 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" @@ -86,6 +87,7 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced"); namespace { struct TailCallElim : public FunctionPass { const TargetTransformInfo *TTI; + const DataLayout *DL; static char ID; // Pass identification, replacement for typeid TailCallElim() : FunctionPass(ID) { @@ -157,6 +159,8 @@ bool TailCallElim::runOnFunction(Function &F) { if (skipOptnoneFunction(F)) return false; + DL = F.getParent()->getDataLayout(); + bool AllCallsAreTailCalls = false; bool Modified = markTails(F, AllCallsAreTailCalls); if (AllCallsAreTailCalls) @@ -450,7 +454,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { // being loaded from. if (CI->mayWriteToMemory() || !isSafeToLoadUnconditionally(L->getPointerOperand(), L, - L->getAlignment())) + L->getAlignment(), DL)) return false; } } diff --git a/test/Transforms/InstCombine/load.ll b/test/Transforms/InstCombine/load.ll index c8ce70a5c03..20d40e2ccfd 100644 --- a/test/Transforms/InstCombine/load.ll +++ b/test/Transforms/InstCombine/load.ll @@ -2,6 +2,7 @@ ; This test makes sure that these instructions are properly eliminated. +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @X = constant i32 42 ; [#uses=2] @X2 = constant i32 47 ; [#uses=1] diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll index d625f3b1b33..d1d8b888713 100644 --- a/test/Transforms/InstCombine/select.ll +++ b/test/Transforms/InstCombine/select.ll @@ -1236,3 +1236,41 @@ define i32 @test75(i32 %x) { ; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 68, i32 %x ; CHECK-NEXT: ret i32 [[SEL]] } + +@under_aligned = external global i32, align 1 + +define i32 @test76(i1 %flag, i32* %x) { +; The load here must not be speculated around the select. One side of the +; select is trivially dereferencable but may have a lower alignment than the +; load does. +; CHECK-LABEL: @test76( +; CHECK: store i32 0, i32* %x +; CHECK: %[[P:.*]] = select i1 %flag, i32* @under_aligned, i32* %x +; CHECK: load i32* %[[P]] + + store i32 0, i32* %x + %p = select i1 %flag, i32* @under_aligned, i32* %x + %v = load i32* %p + ret i32 %v +} + +declare void @scribble_on_memory(i32*) + +define i32 @test77(i1 %flag, i32* %x) { +; The load here must not be speculated around the select. One side of the +; select is trivially dereferencable but may have a lower alignment than the +; load does. +; CHECK-LABEL: @test77( +; CHECK: %[[A:.*]] = alloca i32, align 1 +; CHECK: call void @scribble_on_memory(i32* %[[A]]) +; CHECK: store i32 0, i32* %x +; CHECK: %[[P:.*]] = select i1 %flag, i32* %[[A]], i32* %x +; CHECK: load i32* %[[P]] + + %under_aligned = alloca i32, align 1 + call void @scribble_on_memory(i32* %under_aligned) + store i32 0, i32* %x + %p = select i1 %flag, i32* %under_aligned, i32* %x + %v = load i32* %p + ret i32 %v +} diff --git a/test/Transforms/TailCallElim/reorder_load.ll b/test/Transforms/TailCallElim/reorder_load.ll index 53c65dab101..2e350d662a3 100644 --- a/test/Transforms/TailCallElim/reorder_load.ll +++ b/test/Transforms/TailCallElim/reorder_load.ll @@ -1,6 +1,8 @@ ; RUN: opt < %s -tailcallelim -S | FileCheck %s ; PR4323 +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + ; Several cases where tail call elimination should move the load above the call, ; then eliminate the tail recursion. @@ -12,6 +14,11 @@ ; This load can be moved above the call because the function won't write to it ; and the call has no side effects. define fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly { +; CHECK-LABEL: @raise_load_1( +; CHECK-NOT: call +; CHECK: load i32* +; CHECK-NOT: call +; CHECK: } entry: %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; [#uses=1] br i1 %tmp2, label %if, label %else @@ -21,7 +28,6 @@ if: ; preds = %entry else: ; preds = %entry %tmp7 = add i32 %start_arg, 1 ; [#uses=1] -; CHECK-NOT: call %tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; [#uses=1] %tmp9 = load i32* %a_arg ; [#uses=1] %tmp10 = add i32 %tmp9, %tmp8 ; [#uses=1] @@ -32,6 +38,11 @@ else: ; preds = %entry ; This load can be moved above the call because the function won't write to it ; and the load provably can't trap. define fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly { +; CHECK-LABEL: @raise_load_2( +; CHECK-NOT: call +; CHECK: load i32* +; CHECK-NOT: call +; CHECK: } entry: %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; [#uses=1] br i1 %tmp2, label %if, label %else @@ -48,7 +59,6 @@ unwind: ; preds = %else recurse: ; preds = %else %tmp7 = add i32 %start_arg, 1 ; [#uses=1] -; CHECK-NOT: call %tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; [#uses=1] %tmp9 = load i32* @global ; [#uses=1] %tmp10 = add i32 %tmp9, %tmp8 ; [#uses=1] @@ -59,6 +69,11 @@ recurse: ; preds = %else ; This load can be safely moved above the call (even though it's from an ; extern_weak global) because the call has no side effects. define fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly { +; CHECK-LABEL: @raise_load_3( +; CHECK-NOT: call +; CHECK: load i32* +; CHECK-NOT: call +; CHECK: } entry: %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; [#uses=1] br i1 %tmp2, label %if, label %else @@ -68,7 +83,6 @@ if: ; preds = %entry else: ; preds = %entry %tmp7 = add i32 %start_arg, 1 ; [#uses=1] -; CHECK-NOT: call %tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; [#uses=1] %tmp9 = load i32* @extern_weak_global ; [#uses=1] %tmp10 = add i32 %tmp9, %tmp8 ; [#uses=1] @@ -80,6 +94,12 @@ else: ; preds = %entry ; unknown pointer (which normally means it might trap) because the first load ; proves it doesn't trap. define fastcc i32 @raise_load_4(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly { +; CHECK-LABEL: @raise_load_4( +; CHECK-NOT: call +; CHECK: load i32* +; CHECK-NEXT: load i32* +; CHECK-NOT: call +; CHECK: } entry: %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; [#uses=1] br i1 %tmp2, label %if, label %else @@ -97,7 +117,6 @@ unwind: ; preds = %else recurse: ; preds = %else %tmp7 = add i32 %start_arg, 1 ; [#uses=1] %first = load i32* %a_arg ; [#uses=1] -; CHECK-NOT: call %tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7) ; [#uses=1] %second = load i32* %a_arg ; [#uses=1] %tmp10 = add i32 %second, %tmp8 ; [#uses=1]