diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9ce0046f12b..38746b0dcbe 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1564,7 +1564,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
 
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
-                       /*isVolatile*/false, /*AlwaysInline=*/true,
+                       /*isVolatile*/false, /*AlwaysInline=*/false,
                        MachinePointerInfo(), MachinePointerInfo());
 }
 
diff --git a/test/CodeGen/X86/byval-memcpy.ll b/test/CodeGen/X86/byval-memcpy.ll
new file mode 100644
index 00000000000..3f57f509a02
--- /dev/null
+++ b/test/CodeGen/X86/byval-memcpy.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86 | FileCheck %s
+; CHECK: _memcpy
+define void @foo([40000 x i32] *%P) nounwind {
+  call void @bar([40000 x i32] * byval align 1 %P)
+  ret void
+}
+
+declare void @bar([40000 x i32] *%P )
+    
diff --git a/test/CodeGen/X86/byval2.ll b/test/CodeGen/X86/byval2.ll
index 71129f5f6c9..0b6dfc9e90c 100644
--- a/test/CodeGen/X86/byval2.ll
+++ b/test/CodeGen/X86/byval2.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
-; RUN: llc < %s -march=x86    | grep rep.movsl | count 2
+; RUN: llc < %s -march=x86-64 | egrep {rep.movsq|memcpy} | count 2
+; RUN: llc < %s -march=x86    | egrep {rep.movsl|memcpy} | count 2
 
 %struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
                    i64, i64, i64, i64, i64, i64, i64, i64,
diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll
index 504e0bed791..29cb41c176f 100644
--- a/test/CodeGen/X86/byval3.ll
+++ b/test/CodeGen/X86/byval3.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
-; RUN: llc < %s -march=x86 | grep rep.movsl | count 2
+; RUN: llc < %s -march=x86-64 | egrep {rep.movsq|memcpy} | count 2
+; RUN: llc < %s -march=x86 | egrep {rep.movsl|memcpy} | count 2
 
 %struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
                    i32, i32, i32, i32, i32, i32, i32, i32,
diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll
index 4db9d650b43..bd745f04a04 100644
--- a/test/CodeGen/X86/byval4.ll
+++ b/test/CodeGen/X86/byval4.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
-; RUN: llc < %s -march=x86 | grep rep.movsl	 | count 2
+; RUN: llc < %s -march=x86-64 | egrep {rep.movsq|memcpy} | count 2
+; RUN: llc < %s -march=x86 | egrep {rep.movsl|memcpy}	 | count 2
 
 %struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
                    i16, i16, i16, i16, i16, i16, i16, i16,
diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll
index 69c115b9732..c13de4b5afc 100644
--- a/test/CodeGen/X86/byval5.ll
+++ b/test/CodeGen/X86/byval5.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
-; RUN: llc < %s -march=x86 | grep rep.movsl	 | count 2
+; RUN: llc < %s -march=x86-64 | egrep {rep.movsq|memcpy} | count 2
+; RUN: llc < %s -march=x86 | egrep {rep.movsl|memcpy}	 | count 2
 
 %struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
                    i8, i8, i8, i8, i8, i8, i8, i8,
diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll
index 686ed9c74dd..8e3aa0bbdea 100644
--- a/test/CodeGen/X86/byval7.ll
+++ b/test/CodeGen/X86/byval7.ll
@@ -8,10 +8,8 @@ define i32 @main() nounwind  {
 entry:
 ; CHECK: main:
 ; CHECK: movl $1, (%esp)
-; CHECK: leal 16(%esp), %edi
-; CHECK: movl $36, %ecx
-; CHECK: leal 160(%esp), %esi
-; CHECK: rep;movsl
+; CHECK: movl ${{36|144}},
+; CHECK: {{rep;movsl|memcpy}}
 	%s = alloca %struct.S		; <%struct.S*> [#uses=2]
 	%tmp15 = getelementptr %struct.S* %s, i32 0, i32 0		; <<2 x i64>*> [#uses=1]
 	store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16
diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll
index 7c685b85807..b4a2e029865 100644
--- a/test/CodeGen/X86/tailcallbyval64.ll
+++ b/test/CodeGen/X86/tailcallbyval64.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86-64  -tailcallopt  | grep TAILCALL
 ; Expect 2 rep;movs because of tail call byval lowering.
-; RUN: llc < %s -march=x86-64  -tailcallopt  | grep rep | wc -l | grep 2
+; RUN: llc < %s -march=x86-64  -tailcallopt  | egrep {rep|memcpy} | wc -l | grep 2
 ; A sequence of copyto/copyfrom virtual registers is used to deal with byval
 ; lowering appearing after moving arguments to registers. The following two
 ; checks verify that the register allocator changes those sequences to direct