1. Use pxor instead of xoraps / xorapd to clear FR32 / FR64 registers. This

proves to be worth 20% on Ptrdist/ks. Might be related to dependency
   breaking support.
2. Added FsMOVAPSrr and FsMOVAPDrr as aliases to MOVAPSrr and MOVAPDrr. These
   are used for FR32 / FR64 reg-to-reg copies.
3. Tell reg-allocator to generate MOVSSrm / MOVSDrm and MOVSSmr / MOVSDmr to
   spill / restore FsMOVAPSrr and FsMOVAPDrr.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26241 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2006-02-16 22:45:17 +00:00
parent 19ade3bf9c
commit fe5cb19405
3 changed files with 29 additions and 13 deletions

View File

@ -29,6 +29,7 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
MachineOpCode oc = MI.getOpcode();
if (oc == X86::MOV8rr || oc == X86::MOV16rr || oc == X86::MOV32rr ||
oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr ||
oc == X86::MOVAPSrr || oc == X86::MOVAPDrr) {
assert(MI.getNumOperands() == 2 &&
MI.getOperand(0).isRegister() &&

View File

@ -2487,13 +2487,13 @@ def UCOMISDrm: I<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
[(X86cmp FR64:$src1, (loadf64 addr:$src2))]>,
Requires<[HasSSE2]>, TB, OpSize;
// Pseudo-instructions that map fld0 to xorps/xorpd for sse.
// Pseudo-instructions that map fld0 to pxor for sse.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
def FLD0SS : I<0x57, MRMInitReg, (ops FR32:$dst),
"xorps $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
def FLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
"pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
Requires<[HasSSE1]>, TB;
def FLD0SD : I<0x57, MRMInitReg, (ops FR64:$dst),
"xorpd $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
def FLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
"pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
Requires<[HasSSE2]>, TB, OpSize;
let isTwoAddress = 1 in {
@ -3033,13 +3033,22 @@ def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F8:$src),
"movapd {$src, $dst|$dst, $src}",[]>,
Requires<[HasSSE2]>, TB, OpSize;
// Pseudo-instructions to load FR32 / FR64 from f128mem using movaps / movapd.
// Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
// Upper bits are disregarded.
def MOVSAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
"movaps {$src, $dst|$dst, $src}",
[(set FR32:$dst, (X86loadpf32 addr:$src))]>,
Requires<[HasSSE1]>, TB;
def MOVSAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops V4F4:$dst, V4F4:$src),
"movaps {$src, $dst|$dst, $src}", []>,
Requires<[HasSSE1]>, TB;
def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops V2F8:$dst, V2F8:$src),
"movapd {$src, $dst|$dst, $src}", []>,
Requires<[HasSSE2]>, TB, OpSize;
// Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
// Upper bits are disregarded.
def FsMOVAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
"movaps {$src, $dst|$dst, $src}",
[(set FR32:$dst, (X86loadpf32 addr:$src))]>,
Requires<[HasSSE1]>, TB;
def FsMOVAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
"movapd {$src, $dst|$dst, $src}",
[(set FR64:$dst, (X86loadpf64 addr:$src))]>,
Requires<[HasSSE2]>, TB, OpSize;

View File

@ -114,9 +114,9 @@ void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
} else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) {
Opc = X86::FpMOV;
} else if (RC == &X86::FR32RegClass || RC == &X86::V4F4RegClass) {
Opc = X86::MOVAPSrr;
Opc = X86::FsMOVAPSrr;
} else if (RC == &X86::FR64RegClass || RC == &X86::V2F8RegClass) {
Opc = X86::MOVAPDrr;
Opc = X86::FsMOVAPDrr;
} else {
assert(0 && "Unknown regclass");
abort();
@ -313,6 +313,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr* MI,
case X86::CMP8ri: return MakeMIInst(X86::CMP8mi , FrameIndex, MI);
case X86::CMP16ri: return MakeMIInst(X86::CMP16mi, FrameIndex, MI);
case X86::CMP32ri: return MakeMIInst(X86::CMP32mi, FrameIndex, MI);
// Alias scalar SSE instructions
case X86::FsMOVAPSrr: return MakeMRInst(X86::MOVSSmr, FrameIndex, MI);
case X86::FsMOVAPDrr: return MakeMRInst(X86::MOVSDmr, FrameIndex, MI);
// Scalar SSE instructions
case X86::MOVSSrr: return MakeMRInst(X86::MOVSSmr, FrameIndex, MI);
case X86::MOVSDrr: return MakeMRInst(X86::MOVSDmr, FrameIndex, MI);
@ -393,6 +396,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr* MI,
case X86::MOVZX16rr8:return MakeRMInst(X86::MOVZX16rm8 , FrameIndex, MI);
case X86::MOVZX32rr8:return MakeRMInst(X86::MOVZX32rm8, FrameIndex, MI);
case X86::MOVZX32rr16:return MakeRMInst(X86::MOVZX32rm16, FrameIndex, MI);
// Alias scalar SSE instructions
case X86::FsMOVAPSrr:return MakeRMInst(X86::MOVSSrm, FrameIndex, MI);
case X86::FsMOVAPDrr:return MakeRMInst(X86::MOVSDrm, FrameIndex, MI);
// Scalar SSE instructions
case X86::MOVSSrr: return MakeRMInst(X86::MOVSSrm, FrameIndex, MI);
case X86::MOVSDrr: return MakeRMInst(X86::MOVSDrm, FrameIndex, MI);