diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 650612144ad..4b7e82c927e 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -752,6 +752,8 @@ namespace {
     bool iterateOnFunction(Function &F);
     Value* CollapsePhi(PHINode* p);
     bool isSafeReplacement(PHINode* p, Instruction* inst);
+    bool valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use,
+                                 Instruction* cutoff);
   };
   
   char GVN::ID = 0;
@@ -1055,22 +1057,32 @@ bool GVN::processLoad(LoadInst* L,
   return deletedLoad;
 }
 
-/// isReturnSlotOptznProfitable - Determine if performing a return slot 
-/// fusion with the slot dest is profitable
-static bool isReturnSlotOptznProfitable(Value* dest, MemCpyInst* cpy) {
-  // We currently consider it profitable if dest is otherwise dead.
-  SmallVector<User*, 8> useList(dest->use_begin(), dest->use_end());
+/// valueHasOnlyOneUse - Returns true if a value has only one use after the
+/// cutoff that is in the current same block and is the same as the use
+/// parameter.
+bool GVN::valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use,
+                                  Instruction* cutoff) {
+  DominatorTree& DT = getAnalysis<DominatorTree>();
+  
+  SmallVector<User*, 8> useList(val->use_begin(), val->use_end());
   while (!useList.empty()) {
     User* UI = useList.back();
     
+    
     if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
       useList.pop_back();
       for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
            I != E; ++I)
         useList.push_back(*I);
-    } else if (UI == cpy)
+    } else if (UI == use) {
       useList.pop_back();
-    else
+    } else if (Instruction* inst = dyn_cast<Instruction>(UI)) {
+      if (inst->getParent() == use->getParent() &&
+          (inst == cutoff || !DT.dominates(cutoff, inst))) {
+        useList.pop_back();
+      } else
+        return false;
+    } else
       return false;
   }
   
@@ -1123,8 +1135,14 @@ bool GVN::performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C,
   if (TD.getTypeStoreSize(PT->getElementType()) != cpyLength->getZExtValue())
     return false;
   
+  // For safety, we must ensure that the output parameter of the call only has
+  // a single use, the memcpy.  Otherwise this can introduce an invalid
+  // transformation.
+  if (!valueHasOnlyOneUseAfter(CS.getArgument(0), cpy, C))
+    return false;
+  
   // We only perform the transformation if it will be profitable. 
-  if (!isReturnSlotOptznProfitable(cpyDest, cpy))
+  if (!valueHasOnlyOneUseAfter(cpyDest, cpy, C))
     return false;
   
   // In addition to knowing that the call does not access the return slot
diff --git a/test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll
new file mode 100644
index 00000000000..797dba2b698
--- /dev/null
+++ b/test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll
@@ -0,0 +1,32 @@
+; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | grep {call.*initialize} | grep memtmp | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define internal fastcc void @initialize({ x86_fp80, x86_fp80 }* noalias sret  %agg.result) nounwind  {
+entry:
+	%agg.result.03 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 0		; <x86_fp80*> [#uses=1]
+	store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
+	%agg.result.15 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 1		; <x86_fp80*> [#uses=1]
+	store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
+	ret void
+}
+
+declare fastcc x86_fp80 @passed_uninitialized({ x86_fp80, x86_fp80 }* %x) nounwind
+
+define fastcc void @badly_optimized() nounwind  {
+entry:
+	%z = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+	%tmp = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+	%memtmp = alloca { x86_fp80, x86_fp80 }, align 8		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+	call fastcc void @initialize( { x86_fp80, x86_fp80 }* noalias sret  %memtmp )
+	%tmp1 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*		; <i8*> [#uses=1]
+	%memtmp2 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %tmp1, i8* %memtmp2, i32 24, i32 8 )
+	%z3 = bitcast { x86_fp80, x86_fp80 }* %z to i8*		; <i8*> [#uses=1]
+	%tmp4 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %z3, i8* %tmp4, i32 24, i32 8 )
+	%tmp5 = call fastcc x86_fp80 @passed_uninitialized( { x86_fp80, x86_fp80 }* %z )		; <x86_fp80> [#uses=0]
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
\ No newline at end of file