diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index b9c620fddc4..3b3ab2a83a1 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -200,13 +200,11 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); - // ESI might be used as a base pointer, in that case we can't simply overwrite - // the register. Fall back to generic code. + // If ESI is used as a base pointer, we must preserve it when doing rep movs. const X86RegisterInfo *TRI = static_cast(DAG.getTarget().getRegisterInfo()); - if (TRI->hasBasePointer(DAG.getMachineFunction()) && - TRI->getBaseRegister() == X86::ESI) - return SDValue(); + bool PreserveESI = TRI->hasBasePointer(DAG.getMachineFunction()) && + TRI->getBaseRegister() == X86::ESI; MVT AVT; if (Align & 1) @@ -225,27 +223,45 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Count = DAG.getIntPtrConstant(CountVal); unsigned BytesLeft = SizeVal % UBytes; - SDValue InFlag(0, 0); + + if (PreserveESI) { + // Save ESI to a physical register. (We cannot use a virtual register + // because if it is spilled we wouldn't be able to reload it.) + // We don't glue this because the register dependencies are explicit. + Chain = DAG.getCopyToReg(Chain, dl, X86::EDX, + DAG.getRegister(X86::ESI, MVT::i32)); + } + + SDValue InGlue(0, 0); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, - Count, InFlag); - InFlag = Chain.getValue(1); + Count, InGlue); + InGlue = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, - Dst, InFlag); - InFlag = Chain.getValue(1); + Dst, InGlue); + InGlue = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : X86::ESI, - Src, InFlag); - InFlag = Chain.getValue(1); + Src, InGlue); + InGlue = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; + SDValue Ops[] = { Chain, DAG.getValueType(AVT), InGlue }; + // FIXME: Make X86rep_movs explicitly use FCX, RDI, RSI instead of glue. SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, array_lengthof(Ops)); + if (PreserveESI) { + InGlue = RepMovs.getValue(1); + RepMovs = DAG.getCopyToReg(RepMovs, dl, X86::ESI, + DAG.getRegister(X86::EDX, MVT::i32), InGlue); + } + SmallVector Results; Results.push_back(RepMovs); + + if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; diff --git a/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll b/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll index b55571bcba0..f64ccb84683 100644 --- a/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll +++ b/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll @@ -13,5 +13,7 @@ define void @test1(%struct.foo* nocapture %x, i32 %y, i8* %z) nounwind { ; CHECK-LABEL: test1: ; CHECK: movl %esp, %esi -; CHECK-NOT: rep;movsl +; CHECK: movl %esi, %edx +; CHECK: rep;movsl +; CHECK: movl %edx, %esi } diff --git a/test/CodeGen/X86/stack-align-memcpy.ll b/test/CodeGen/X86/stack-align-memcpy.ll index 0cc3aa84889..d6ea8b31475 100644 --- a/test/CodeGen/X86/stack-align-memcpy.ll +++ b/test/CodeGen/X86/stack-align-memcpy.ll @@ -15,7 +15,9 @@ define void @test1(%struct.foo* nocapture %x, i32 %y) nounwind { ; CHECK-LABEL: test1: ; CHECK: andl $-16, %esp ; CHECK: movl %esp, %esi -; CHECK-NOT: rep;movsl +; CHECK: movl %esi, %edx +; CHECK: rep;movsl +; CHECK: movl %edx, %esi } ; PR19012 @@ -28,7 +30,9 @@ define void @test2(%struct.foo* nocapture %x, i32 %y, i8* %z) nounwind { ; CHECK-LABEL: test2: ; CHECK: movl %esp, %esi -; CHECK-NOT: rep;movsl +; CHECK: movl %esi, %edx +; CHECK: rep;movsl +; CHECK: movl %edx, %esi } ; Check that we do use rep movs if we make the alloca static. @@ -39,5 +43,6 @@ define void @test3(%struct.foo* nocapture %x, i32 %y, i8* %z) nounwind { ret void ; CHECK-LABEL: test3: +; CHECK-NOT: movl %esi, %edx ; CHECK: rep;movsl }