Add support for AVX to materialize +0.0 when doing scalar FP.

llvm-svn: 121415
This commit is contained in:
Nate Begeman 2010-12-09 21:43:51 +00:00
parent 4be3b6db5c
commit 4a62a3e229
3 changed files with 12 additions and 2 deletions

View File

@ -2420,9 +2420,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = 16;
break;
case X86::FsFLD0SD:
case X86::VFsFLD0SD:
Alignment = 8;
break;
case X86::FsFLD0SS:
case X86::VFsFLD0SS:
Alignment = 4;
break;
default:
@ -2486,9 +2488,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineConstantPool &MCP = *MF.getConstantPool();
const Type *Ty;
unsigned Opc = LoadMI->getOpcode();
if (Opc == X86::FsFLD0SS)
if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
else if (Opc == X86::FsFLD0SD)
else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);

View File

@ -1440,6 +1440,12 @@ def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
[(set FR64:$dst, fpimm0)]>,
Requires<[HasSSE2]>, TB, OpSize;
def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
[(set FR32:$dst, fp32imm0)]>,
Requires<[HasAVX]>, TB, OpSize, VEX_4V;
def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
[(set FR64:$dst, fpimm0)]>,
Requires<[HasAVX]>, TB, OpSize, VEX_4V;
}
// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper

View File

@ -374,6 +374,8 @@ ReSimplify:
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;