diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index dec8620fd73..17757726a2d 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -29,6 +29,7 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, MachineOpCode oc = MI.getOpcode(); if (oc == X86::MOV8rr || oc == X86::MOV16rr || oc == X86::MOV32rr || oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr || + oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr || oc == X86::MOVAPSrr || oc == X86::MOVAPDrr) { assert(MI.getNumOperands() == 2 && MI.getOperand(0).isRegister() && diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index e061c81a9c1..d1d262c6821 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -2487,13 +2487,13 @@ def UCOMISDrm: I<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2), [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>, Requires<[HasSSE2]>, TB, OpSize; -// Pseudo-instructions that map fld0 to xorps/xorpd for sse. +// Pseudo-instructions that map fld0 to pxor for sse. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. -def FLD0SS : I<0x57, MRMInitReg, (ops FR32:$dst), - "xorps $dst, $dst", [(set FR32:$dst, fp32imm0)]>, +def FLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst), + "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>, TB; -def FLD0SD : I<0x57, MRMInitReg, (ops FR64:$dst), - "xorpd $dst, $dst", [(set FR64:$dst, fp64imm0)]>, +def FLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst), + "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>, Requires<[HasSSE2]>, TB, OpSize; let isTwoAddress = 1 in { @@ -3033,13 +3033,22 @@ def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F8:$src), "movapd {$src, $dst|$dst, $src}",[]>, Requires<[HasSSE2]>, TB, OpSize; -// Pseudo-instructions to load FR32 / FR64 from f128mem using movaps / movapd. +// Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd. // Upper bits are disregarded. -def MOVSAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src), - "movaps {$src, $dst|$dst, $src}", - [(set FR32:$dst, (X86loadpf32 addr:$src))]>, - Requires<[HasSSE1]>, TB; -def MOVSAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src), +def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops V4F4:$dst, V4F4:$src), + "movaps {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE1]>, TB; +def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops V2F8:$dst, V2F8:$src), + "movapd {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE2]>, TB, OpSize; + +// Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd. +// Upper bits are disregarded. +def FsMOVAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src), + "movaps {$src, $dst|$dst, $src}", + [(set FR32:$dst, (X86loadpf32 addr:$src))]>, + Requires<[HasSSE1]>, TB; +def FsMOVAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src), "movapd {$src, $dst|$dst, $src}", [(set FR64:$dst, (X86loadpf64 addr:$src))]>, Requires<[HasSSE2]>, TB, OpSize; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index cf31167061d..2ffa4c7412b 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -114,9 +114,9 @@ void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB, } else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) { Opc = X86::FpMOV; } else if (RC == &X86::FR32RegClass || RC == &X86::V4F4RegClass) { - Opc = X86::MOVAPSrr; + Opc = X86::FsMOVAPSrr; } else if (RC == &X86::FR64RegClass || RC == &X86::V2F8RegClass) { - Opc = X86::MOVAPDrr; + Opc = X86::FsMOVAPDrr; } else { assert(0 && "Unknown regclass"); abort(); @@ -313,6 +313,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr* MI, case X86::CMP8ri: return MakeMIInst(X86::CMP8mi , FrameIndex, MI); case X86::CMP16ri: return MakeMIInst(X86::CMP16mi, FrameIndex, MI); case X86::CMP32ri: return MakeMIInst(X86::CMP32mi, FrameIndex, MI); + // Alias scalar SSE instructions + case X86::FsMOVAPSrr: return MakeMRInst(X86::MOVSSmr, FrameIndex, MI); + case X86::FsMOVAPDrr: return MakeMRInst(X86::MOVSDmr, FrameIndex, MI); // Scalar SSE instructions case X86::MOVSSrr: return MakeMRInst(X86::MOVSSmr, FrameIndex, MI); case X86::MOVSDrr: return MakeMRInst(X86::MOVSDmr, FrameIndex, MI); @@ -393,6 +396,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr* MI, case X86::MOVZX16rr8:return MakeRMInst(X86::MOVZX16rm8 , FrameIndex, MI); case X86::MOVZX32rr8:return MakeRMInst(X86::MOVZX32rm8, FrameIndex, MI); case X86::MOVZX32rr16:return MakeRMInst(X86::MOVZX32rm16, FrameIndex, MI); + // Alias scalar SSE instructions + case X86::FsMOVAPSrr:return MakeRMInst(X86::MOVSSrm, FrameIndex, MI); + case X86::FsMOVAPDrr:return MakeRMInst(X86::MOVSDrm, FrameIndex, MI); // Scalar SSE instructions case X86::MOVSSrr: return MakeRMInst(X86::MOVSSrm, FrameIndex, MI); case X86::MOVSDrr: return MakeRMInst(X86::MOVSDrm, FrameIndex, MI);