diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 650612144ad..4b7e82c927e 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -752,6 +752,8 @@ namespace { bool iterateOnFunction(Function &F); Value* CollapsePhi(PHINode* p); bool isSafeReplacement(PHINode* p, Instruction* inst); + bool valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use, + Instruction* cutoff); }; char GVN::ID = 0; @@ -1055,22 +1057,32 @@ bool GVN::processLoad(LoadInst* L, return deletedLoad; } -/// isReturnSlotOptznProfitable - Determine if performing a return slot -/// fusion with the slot dest is profitable -static bool isReturnSlotOptznProfitable(Value* dest, MemCpyInst* cpy) { - // We currently consider it profitable if dest is otherwise dead. - SmallVector useList(dest->use_begin(), dest->use_end()); +/// valueHasOnlyOneUse - Returns true if a value has only one use after the +/// cutoff that is in the current same block and is the same as the use +/// parameter. +bool GVN::valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use, + Instruction* cutoff) { + DominatorTree& DT = getAnalysis(); + + SmallVector useList(val->use_begin(), val->use_end()); while (!useList.empty()) { User* UI = useList.back(); + if (isa(UI) || isa(UI)) { useList.pop_back(); for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); I != E; ++I) useList.push_back(*I); - } else if (UI == cpy) + } else if (UI == use) { useList.pop_back(); - else + } else if (Instruction* inst = dyn_cast(UI)) { + if (inst->getParent() == use->getParent() && + (inst == cutoff || !DT.dominates(cutoff, inst))) { + useList.pop_back(); + } else + return false; + } else return false; } @@ -1123,8 +1135,14 @@ bool GVN::performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C, if (TD.getTypeStoreSize(PT->getElementType()) != cpyLength->getZExtValue()) return false; + // For safety, we must ensure that the output parameter of the call only has + // a single use, the memcpy. Otherwise this can introduce an invalid + // transformation. + if (!valueHasOnlyOneUseAfter(CS.getArgument(0), cpy, C)) + return false; + // We only perform the transformation if it will be profitable. - if (!isReturnSlotOptznProfitable(cpyDest, cpy)) + if (!valueHasOnlyOneUseAfter(cpyDest, cpy, C)) return false; // In addition to knowing that the call does not access the return slot diff --git a/test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll new file mode 100644 index 00000000000..797dba2b698 --- /dev/null +++ b/test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll @@ -0,0 +1,32 @@ +; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | grep {call.*initialize} | grep memtmp | count 1 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" +target triple = "i386-pc-linux-gnu" + +define internal fastcc void @initialize({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind { +entry: + %agg.result.03 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 0 ; [#uses=1] + store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03 + %agg.result.15 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 1 ; [#uses=1] + store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15 + ret void +} + +declare fastcc x86_fp80 @passed_uninitialized({ x86_fp80, x86_fp80 }* %x) nounwind + +define fastcc void @badly_optimized() nounwind { +entry: + %z = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=2] + %tmp = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=2] + %memtmp = alloca { x86_fp80, x86_fp80 }, align 8 ; <{ x86_fp80, x86_fp80 }*> [#uses=2] + call fastcc void @initialize( { x86_fp80, x86_fp80 }* noalias sret %memtmp ) + %tmp1 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8* ; [#uses=1] + %memtmp2 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; [#uses=1] + call void @llvm.memcpy.i32( i8* %tmp1, i8* %memtmp2, i32 24, i32 8 ) + %z3 = bitcast { x86_fp80, x86_fp80 }* %z to i8* ; [#uses=1] + %tmp4 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8* ; [#uses=1] + call void @llvm.memcpy.i32( i8* %z3, i8* %tmp4, i32 24, i32 8 ) + %tmp5 = call fastcc x86_fp80 @passed_uninitialized( { x86_fp80, x86_fp80 }* %z ) ; [#uses=0] + ret void +} + +declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind \ No newline at end of file