diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index ab7e00f80a3..6bfe9546011 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3016,11 +3016,33 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) { } break; + case ISD::FP_ROUND: + if (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)) == + TargetLowering::Expand) { + // The only way we can lower this is to turn it into a TRUNCSTORE, + // EXTLOAD pair, targetting a temporary location (a stack slot). + + // NOTE: there is a choice here between constantly creating new stack + // slots and always reusing the same one. We currently always create + // new ones, as reuse may inhibit scheduling. + MVT::ValueType VT = Op.getValueType(); // 32 + const Type *Ty = MVT::getTypeForValueType(VT); + uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty); + unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = + MF.getFrameInfo()->CreateStackObject(TySize, Align); + SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + Result = DAG.getTruncStore(DAG.getEntryNode(), Node->getOperand(0), + StackSlot, NULL, 0, VT); + Result = DAG.getLoad(VT, Result, StackSlot, NULL, 0, VT); + break; + } + // FALL THROUGH case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: case ISD::FP_EXTEND: - case ISD::FP_ROUND: switch (getTypeAction(Node->getOperand(0).getValueType())) { case Expand: assert(0 && "Shouldn't need to expand other operators here!"); case Legal: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 7a7874b273e..4dadde0dfe5 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -3918,15 +3918,7 @@ TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, Op = DAG.getNode(ExtOp, getTypeToTransformTo(VT), Op); } else { assert(MVT::isFloatingPoint(VT) && "Not int or FP?"); - // A true promotion would change the size of the argument. - // Instead, pretend this is an int. If FP objects are not - // passed the same as ints, the original type should be Legal - // and we should not get here. - Op = DAG.getNode(ISD::BIT_CONVERT, - VT==MVT::f32 ? MVT::i32 : - (VT==MVT::f64 ? MVT::i64 : - MVT::Other), - Op); + Op = DAG.getNode(ISD::FP_EXTEND, getTypeToTransformTo(VT), Op); } Ops.push_back(Op); Ops.push_back(DAG.getConstant(Flags, MVT::i32)); diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 0c5ded8ba62..b2e0219b83a 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -324,61 +324,101 @@ static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) { // concrete X86 instruction which uses the register stack. // static const TableEntry OpcodeTable[] = { - { X86::FpABS , X86::FABS }, + { X86::FpABS32 , X86::FABS }, + { X86::FpABS64 , X86::FABS }, { X86::FpADD32m , X86::FADD32m }, { X86::FpADD64m , X86::FADD64m }, - { X86::FpCHS , X86::FCHS }, - { X86::FpCMOVB , X86::FCMOVB }, - { X86::FpCMOVBE , X86::FCMOVBE }, - { X86::FpCMOVE , X86::FCMOVE }, - { X86::FpCMOVNB , X86::FCMOVNB }, - { X86::FpCMOVNBE , X86::FCMOVNBE }, - { X86::FpCMOVNE , X86::FCMOVNE }, - { X86::FpCMOVNP , X86::FCMOVNP }, - { X86::FpCMOVP , X86::FCMOVP }, - { X86::FpCOS , X86::FCOS }, + { X86::FpCHS32 , X86::FCHS }, + { X86::FpCHS64 , X86::FCHS }, + { X86::FpCMOVB32 , X86::FCMOVB }, + { X86::FpCMOVB64 , X86::FCMOVB }, + { X86::FpCMOVBE32 , X86::FCMOVBE }, + { X86::FpCMOVBE64 , X86::FCMOVBE }, + { X86::FpCMOVE32 , X86::FCMOVE }, + { X86::FpCMOVE64 , X86::FCMOVE }, + { X86::FpCMOVNB32 , X86::FCMOVNB }, + { X86::FpCMOVNB64 , X86::FCMOVNB }, + { X86::FpCMOVNBE32 , X86::FCMOVNBE }, + { X86::FpCMOVNBE64 , X86::FCMOVNBE }, + { X86::FpCMOVNE32 , X86::FCMOVNE }, + { X86::FpCMOVNE64 , X86::FCMOVNE }, + { X86::FpCMOVNP32 , X86::FCMOVNP }, + { X86::FpCMOVNP64 , X86::FCMOVNP }, + { X86::FpCMOVP32 , X86::FCMOVP }, + { X86::FpCMOVP64 , X86::FCMOVP }, + { X86::FpCOS32 , X86::FCOS }, + { X86::FpCOS64 , X86::FCOS }, { X86::FpDIV32m , X86::FDIV32m }, { X86::FpDIV64m , X86::FDIV64m }, { X86::FpDIVR32m , X86::FDIVR32m }, { X86::FpDIVR64m , X86::FDIVR64m }, - { X86::FpIADD16m , X86::FIADD16m }, - { X86::FpIADD32m , X86::FIADD32m }, - { X86::FpIDIV16m , X86::FIDIV16m }, - { X86::FpIDIV32m , X86::FIDIV32m }, - { X86::FpIDIVR16m, X86::FIDIVR16m}, - { X86::FpIDIVR32m, X86::FIDIVR32m}, - { X86::FpILD16m , X86::FILD16m }, - { X86::FpILD32m , X86::FILD32m }, - { X86::FpILD64m , X86::FILD64m }, - { X86::FpIMUL16m , X86::FIMUL16m }, - { X86::FpIMUL32m , X86::FIMUL32m }, - { X86::FpIST16m , X86::FIST16m }, - { X86::FpIST32m , X86::FIST32m }, - { X86::FpIST64m , X86::FISTP64m }, - { X86::FpISTT16m , X86::FISTTP16m}, - { X86::FpISTT32m , X86::FISTTP32m}, - { X86::FpISTT64m , X86::FISTTP64m}, - { X86::FpISUB16m , X86::FISUB16m }, - { X86::FpISUB32m , X86::FISUB32m }, - { X86::FpISUBR16m, X86::FISUBR16m}, - { X86::FpISUBR32m, X86::FISUBR32m}, - { X86::FpLD0 , X86::FLD0 }, - { X86::FpLD1 , X86::FLD1 }, + { X86::FpIADD16m32 , X86::FIADD16m }, + { X86::FpIADD16m64 , X86::FIADD16m }, + { X86::FpIADD32m32 , X86::FIADD32m }, + { X86::FpIADD32m64 , X86::FIADD32m }, + { X86::FpIDIV16m32 , X86::FIDIV16m }, + { X86::FpIDIV16m64 , X86::FIDIV16m }, + { X86::FpIDIV32m32 , X86::FIDIV32m }, + { X86::FpIDIV32m64 , X86::FIDIV32m }, + { X86::FpIDIVR16m32, X86::FIDIVR16m}, + { X86::FpIDIVR16m64, X86::FIDIVR16m}, + { X86::FpIDIVR32m32, X86::FIDIVR32m}, + { X86::FpIDIVR32m64, X86::FIDIVR32m}, + { X86::FpILD16m32 , X86::FILD16m }, + { X86::FpILD16m64 , X86::FILD16m }, + { X86::FpILD32m32 , X86::FILD32m }, + { X86::FpILD32m64 , X86::FILD32m }, + { X86::FpILD64m32 , X86::FILD64m }, + { X86::FpILD64m64 , X86::FILD64m }, + { X86::FpIMUL16m32 , X86::FIMUL16m }, + { X86::FpIMUL16m64 , X86::FIMUL16m }, + { X86::FpIMUL32m32 , X86::FIMUL32m }, + { X86::FpIMUL32m64 , X86::FIMUL32m }, + { X86::FpIST16m32 , X86::FIST16m }, + { X86::FpIST16m64 , X86::FIST16m }, + { X86::FpIST32m32 , X86::FIST32m }, + { X86::FpIST32m64 , X86::FIST32m }, + { X86::FpIST64m32 , X86::FISTP64m }, + { X86::FpIST64m64 , X86::FISTP64m }, + { X86::FpISTT16m32 , X86::FISTTP16m}, + { X86::FpISTT16m64 , X86::FISTTP16m}, + { X86::FpISTT32m32 , X86::FISTTP32m}, + { X86::FpISTT32m64 , X86::FISTTP32m}, + { X86::FpISTT64m32 , X86::FISTTP64m}, + { X86::FpISTT64m64 , X86::FISTTP64m}, + { X86::FpISUB16m32 , X86::FISUB16m }, + { X86::FpISUB16m64 , X86::FISUB16m }, + { X86::FpISUB32m32 , X86::FISUB32m }, + { X86::FpISUB32m64 , X86::FISUB32m }, + { X86::FpISUBR16m32, X86::FISUBR16m}, + { X86::FpISUBR16m64, X86::FISUBR16m}, + { X86::FpISUBR32m32, X86::FISUBR32m}, + { X86::FpISUBR32m64, X86::FISUBR32m}, + { X86::FpLD032 , X86::FLD0 }, + { X86::FpLD064 , X86::FLD0 }, + { X86::FpLD132 , X86::FLD1 }, + { X86::FpLD164 , X86::FLD1 }, { X86::FpLD32m , X86::FLD32m }, { X86::FpLD64m , X86::FLD64m }, { X86::FpMUL32m , X86::FMUL32m }, { X86::FpMUL64m , X86::FMUL64m }, - { X86::FpSIN , X86::FSIN }, - { X86::FpSQRT , X86::FSQRT }, + { X86::FpSIN32 , X86::FSIN }, + { X86::FpSIN64 , X86::FSIN }, + { X86::FpSQRT32 , X86::FSQRT }, + { X86::FpSQRT64 , X86::FSQRT }, { X86::FpST32m , X86::FST32m }, { X86::FpST64m , X86::FST64m }, + { X86::FpST64m32 , X86::FST32m }, { X86::FpSUB32m , X86::FSUB32m }, { X86::FpSUB64m , X86::FSUB64m }, { X86::FpSUBR32m , X86::FSUBR32m }, { X86::FpSUBR64m , X86::FSUBR64m }, - { X86::FpTST , X86::FTST }, - { X86::FpUCOMIr , X86::FUCOMIr }, - { X86::FpUCOMr , X86::FUCOMr }, + { X86::FpTST32 , X86::FTST }, + { X86::FpTST64 , X86::FTST }, + { X86::FpUCOMIr32 , X86::FUCOMIr }, + { X86::FpUCOMIr64 , X86::FUCOMIr }, + { X86::FpUCOMr32 , X86::FUCOMr }, + { X86::FpUCOMr64 , X86::FUCOMr }, }; static unsigned getConcreteOpcode(unsigned Opcode) { @@ -510,10 +550,14 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { // Ditto FISTTP16m, FISTTP32m, FISTTP64m. // if (!KillsSrc && - (MI->getOpcode() == X86::FpIST64m || - MI->getOpcode() == X86::FpISTT16m || - MI->getOpcode() == X86::FpISTT32m || - MI->getOpcode() == X86::FpISTT64m)) { + (MI->getOpcode() == X86::FpIST64m32 || + MI->getOpcode() == X86::FpISTT16m32 || + MI->getOpcode() == X86::FpISTT32m32 || + MI->getOpcode() == X86::FpISTT64m32 || + MI->getOpcode() == X86::FpIST64m64 || + MI->getOpcode() == X86::FpISTT16m64 || + MI->getOpcode() == X86::FpISTT32m64 || + MI->getOpcode() == X86::FpISTT64m64)) { duplicateToTop(Reg, 7 /*temp register*/, I); } else { moveToTop(Reg, I); // Move to the top of the stack... @@ -578,34 +622,50 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { // ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i) static const TableEntry ForwardST0Table[] = { - { X86::FpADD , X86::FADDST0r }, - { X86::FpDIV , X86::FDIVST0r }, - { X86::FpMUL , X86::FMULST0r }, - { X86::FpSUB , X86::FSUBST0r }, + { X86::FpADD32 , X86::FADDST0r }, + { X86::FpADD64 , X86::FADDST0r }, + { X86::FpDIV32 , X86::FDIVST0r }, + { X86::FpDIV64 , X86::FDIVST0r }, + { X86::FpMUL32 , X86::FMULST0r }, + { X86::FpMUL64 , X86::FMULST0r }, + { X86::FpSUB32 , X86::FSUBST0r }, + { X86::FpSUB64 , X86::FSUBST0r }, }; // ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0) static const TableEntry ReverseST0Table[] = { - { X86::FpADD , X86::FADDST0r }, // commutative - { X86::FpDIV , X86::FDIVRST0r }, - { X86::FpMUL , X86::FMULST0r }, // commutative - { X86::FpSUB , X86::FSUBRST0r }, + { X86::FpADD32 , X86::FADDST0r }, // commutative + { X86::FpADD64 , X86::FADDST0r }, // commutative + { X86::FpDIV32 , X86::FDIVRST0r }, + { X86::FpDIV64 , X86::FDIVRST0r }, + { X86::FpMUL32 , X86::FMULST0r }, // commutative + { X86::FpMUL64 , X86::FMULST0r }, // commutative + { X86::FpSUB32 , X86::FSUBRST0r }, + { X86::FpSUB64 , X86::FSUBRST0r }, }; // ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i) static const TableEntry ForwardSTiTable[] = { - { X86::FpADD , X86::FADDrST0 }, // commutative - { X86::FpDIV , X86::FDIVRrST0 }, - { X86::FpMUL , X86::FMULrST0 }, // commutative - { X86::FpSUB , X86::FSUBRrST0 }, + { X86::FpADD32 , X86::FADDrST0 }, // commutative + { X86::FpADD64 , X86::FADDrST0 }, // commutative + { X86::FpDIV32 , X86::FDIVRrST0 }, + { X86::FpDIV64 , X86::FDIVRrST0 }, + { X86::FpMUL32 , X86::FMULrST0 }, // commutative + { X86::FpMUL64 , X86::FMULrST0 }, // commutative + { X86::FpSUB32 , X86::FSUBRrST0 }, + { X86::FpSUB64 , X86::FSUBRrST0 }, }; // ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0) static const TableEntry ReverseSTiTable[] = { - { X86::FpADD , X86::FADDrST0 }, - { X86::FpDIV , X86::FDIVrST0 }, - { X86::FpMUL , X86::FMULrST0 }, - { X86::FpSUB , X86::FSUBrST0 }, + { X86::FpADD32 , X86::FADDrST0 }, + { X86::FpADD64 , X86::FADDrST0 }, + { X86::FpDIV32 , X86::FDIVrST0 }, + { X86::FpDIV64 , X86::FDIVrST0 }, + { X86::FpMUL32 , X86::FMULrST0 }, + { X86::FpMUL64 , X86::FMULrST0 }, + { X86::FpSUB32 , X86::FSUBrST0 }, + { X86::FpSUB64 , X86::FSUBrST0 }, }; @@ -777,15 +837,20 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { MachineInstr *MI = I; switch (MI->getOpcode()) { default: assert(0 && "Unknown SpecialFP instruction!"); - case X86::FpGETRESULT: // Appears immediately after a call returning FP type! + case X86::FpGETRESULT32: // Appears immediately after a call returning FP type! + case X86::FpGETRESULT64: // Appears immediately after a call returning FP type! assert(StackTop == 0 && "Stack should be empty after a call!"); pushReg(getFPReg(MI->getOperand(0))); break; - case X86::FpSETRESULT: + case X86::FpSETRESULT32: + case X86::FpSETRESULT64: assert(StackTop == 1 && "Stack should have one element on it to return!"); --StackTop; // "Forget" we have something on the top of stack! break; - case X86::FpMOV: { + case X86::FpMOV3232: + case X86::FpMOV3264: + case X86::FpMOV6432: + case X86::FpMOV6464: { unsigned SrcReg = getFPReg(MI->getOperand(1)); unsigned DestReg = getFPReg(MI->getOperand(0)); diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 58a72b8cb02..8b1690c05f5 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -492,11 +492,13 @@ void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end(); !ContainsFPCode && I != E; ++I) { if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) { + const TargetRegisterClass *clas; for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() && MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) && - RegMap->getRegClass(I->getOperand(0).getReg()) == - X86::RFPRegisterClass) { + ((clas = RegMap->getRegClass(I->getOperand(0).getReg())) == + X86::RFP32RegisterClass || + clas == X86::RFP64RegisterClass)) { ContainsFPCode = true; break; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 91b0f3fb620..641ccb03dec 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -289,11 +289,14 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) addLegalFPImmediate(+0.0); // xorps / xorpd } else { // Set up the FP register classes. - addRegisterClass(MVT::f64, X86::RFPRegisterClass); + addRegisterClass(MVT::f64, X86::RFP64RegisterClass); + addRegisterClass(MVT::f32, X86::RFP32RegisterClass); setOperationAction(ISD::UNDEF, MVT::f64, Expand); + setOperationAction(ISD::UNDEF, MVT::f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f32, Expand); if (!UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f64 , Expand); @@ -301,6 +304,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) } setOperationAction(ISD::ConstantFP, MVT::f64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f32, Expand); addLegalFPImmediate(+0.0); // FLD0 addLegalFPImmediate(+1.0); // FLD1 addLegalFPImmediate(-0.0); // FLD0/FCHS @@ -553,7 +557,7 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); } - SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other); + SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other); SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); Chain = Value.getValue(1); @@ -604,7 +608,7 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, // before the fp stackifier runs. // Copy ST0 into an RFP register with FP_GET_RESULT. - SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); + SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag); SDOperand GROps[] = { Chain, InFlag }; SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); Chain = RetVal.getValue(1); @@ -626,11 +630,6 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0); Chain = RetVal.getValue(1); } - - if (RVLocs[0].getValVT() == MVT::f32 && !X86ScalarSSE) - // FIXME: we would really like to remember that this FP_ROUND - // operation is okay to eliminate if we allow excess FP precision. - RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); ResultVals.push_back(RetVal); } @@ -3252,7 +3251,7 @@ SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { if (X86ScalarSSE) Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); else - Tys = DAG.getVTList(MVT::f64, MVT::Other); + Tys = DAG.getVTList(Op.getValueType(), MVT::Other); SmallVector Ops; Ops.push_back(Chain); Ops.push_back(StackSlot); @@ -3307,7 +3306,7 @@ SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { if (X86ScalarSSE) { assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); - SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other); + SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); SDOperand Ops[] = { Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) }; @@ -4437,9 +4436,12 @@ X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, return BB; } - case X86::FP_TO_INT16_IN_MEM: - case X86::FP_TO_INT32_IN_MEM: - case X86::FP_TO_INT64_IN_MEM: { + case X86::FP32_TO_INT16_IN_MEM: + case X86::FP32_TO_INT32_IN_MEM: + case X86::FP32_TO_INT64_IN_MEM: + case X86::FP64_TO_INT16_IN_MEM: + case X86::FP64_TO_INT32_IN_MEM: + case X86::FP64_TO_INT64_IN_MEM: { // Change the floating point control register to use "round towards zero" // mode when truncating to an integer value. MachineFunction *F = BB->getParent(); @@ -4466,9 +4468,12 @@ X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, unsigned Opc; switch (MI->getOpcode()) { default: assert(0 && "illegal opcode!"); - case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; - case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; - case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; + case X86::FP32_TO_INT16_IN_MEM: Opc = X86::FpIST16m32; break; + case X86::FP32_TO_INT32_IN_MEM: Opc = X86::FpIST32m32; break; + case X86::FP32_TO_INT64_IN_MEM: Opc = X86::FpIST64m32; break; + case X86::FP64_TO_INT16_IN_MEM: Opc = X86::FpIST16m64; break; + case X86::FP64_TO_INT32_IN_MEM: Opc = X86::FpIST32m64; break; + case X86::FP64_TO_INT64_IN_MEM: Opc = X86::FpIST64m64; break; } X86AddressMode AM; diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 848d370db4d..128ef7773a8 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -17,13 +17,13 @@ // FPStack specific DAG Nodes. //===----------------------------------------------------------------------===// -def SDTX86FpGet : SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>; +def SDTX86FpGet : SDTypeProfile<1, 0, [SDTCisFP<0>]>; def SDTX86FpSet : SDTypeProfile<0, 1, [SDTCisFP<0>]>; -def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, +def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; -def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisPtrTy<1>, +def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; def SDTX86FpToIMem: SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; @@ -50,19 +50,19 @@ def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem, // FPStack pattern fragments //===----------------------------------------------------------------------===// -def fp64imm0 : PatLeaf<(f64 fpimm), [{ +def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>; -def fp64immneg0 : PatLeaf<(f64 fpimm), [{ +def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>; -def fp64imm1 : PatLeaf<(f64 fpimm), [{ +def fpimm1 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+1.0); }]>; -def fp64immneg1 : PatLeaf<(f64 fpimm), [{ +def fpimmneg1 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-1.0); }]>; @@ -70,18 +70,30 @@ def extloadf64f32 : PatFrag<(ops node:$ptr), (f64 (extloadf32 node:$ptr))>; // Some 'special' instructions let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. - def FP_TO_INT16_IN_MEM : I<0, Pseudo, - (ops i16mem:$dst, RFP:$src), - "#FP_TO_INT16_IN_MEM PSEUDO!", - [(X86fp_to_i16mem RFP:$src, addr:$dst)]>; - def FP_TO_INT32_IN_MEM : I<0, Pseudo, - (ops i32mem:$dst, RFP:$src), - "#FP_TO_INT32_IN_MEM PSEUDO!", - [(X86fp_to_i32mem RFP:$src, addr:$dst)]>; - def FP_TO_INT64_IN_MEM : I<0, Pseudo, - (ops i64mem:$dst, RFP:$src), - "#FP_TO_INT64_IN_MEM PSEUDO!", - [(X86fp_to_i64mem RFP:$src, addr:$dst)]>; + def FP32_TO_INT16_IN_MEM : I<0, Pseudo, + (ops i16mem:$dst, RFP32:$src), + "#FP32_TO_INT16_IN_MEM PSEUDO!", + [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>; + def FP32_TO_INT32_IN_MEM : I<0, Pseudo, + (ops i32mem:$dst, RFP32:$src), + "#FP32_TO_INT32_IN_MEM PSEUDO!", + [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>; + def FP32_TO_INT64_IN_MEM : I<0, Pseudo, + (ops i64mem:$dst, RFP32:$src), + "#FP32_TO_INT64_IN_MEM PSEUDO!", + [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>; + def FP64_TO_INT16_IN_MEM : I<0, Pseudo, + (ops i16mem:$dst, RFP64:$src), + "#FP64_TO_INT16_IN_MEM PSEUDO!", + [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>; + def FP64_TO_INT32_IN_MEM : I<0, Pseudo, + (ops i32mem:$dst, RFP64:$src), + "#FP64_TO_INT32_IN_MEM PSEUDO!", + [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>; + def FP64_TO_INT64_IN_MEM : I<0, Pseudo, + (ops i64mem:$dst, RFP64:$src), + "#FP64_TO_INT64_IN_MEM PSEUDO!", + [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>; } let isTerminator = 1 in @@ -111,30 +123,47 @@ class FpI_ pattern> } // Random Pseudo Instructions. -def FpGETRESULT : FpI_<(ops RFP:$dst), SpecialFP, - [(set RFP:$dst, X86fpget)]>; // FPR = ST(0) +def FpGETRESULT32 : FpI_<(ops RFP32:$dst), SpecialFP, + [(set RFP32:$dst, X86fpget)]>; // FPR = ST(0) -let noResults = 1 in - def FpSETRESULT : FpI_<(ops RFP:$src), SpecialFP, - [(X86fpset RFP:$src)]>, Imp<[], [ST0]>; // ST(0) = FPR +def FpGETRESULT64 : FpI_<(ops RFP64:$dst), SpecialFP, + [(set RFP64:$dst, X86fpget)]>; // FPR = ST(0) +let noResults = 1 in { + def FpSETRESULT32 : FpI_<(ops RFP32:$src), SpecialFP, + [(X86fpset RFP32:$src)]>, Imp<[], [ST0]>; // ST(0) = FPR + + def FpSETRESULT64 : FpI_<(ops RFP64:$src), SpecialFP, + [(X86fpset RFP64:$src)]>, Imp<[], [ST0]>; // ST(0) = FPR +} // FpI - Floating Point Psuedo Instruction template. Predicated on FPStack. class FpI pattern> : FpI_, Requires<[FPStack]>; - -def FpMOV : FpI<(ops RFP:$dst, RFP:$src), SpecialFP, []>; // f1 = fmov f2 +// Register copies. Just copies, the 64->32 version does not truncate. +def FpMOV3232 : FpI<(ops RFP32:$dst, RFP32:$src), SpecialFP, []>; // f1 = fmov f2 +def FpMOV3264 : FpI<(ops RFP64:$dst, RFP32:$src), SpecialFP, []>; // f1 = fmov f2 +def FpMOV6432 : FpI<(ops RFP32:$dst, RFP64:$src), SpecialFP, []>; // f1 = fmov f2 +def FpMOV6464 : FpI<(ops RFP64:$dst, RFP64:$src), SpecialFP, []>; // f1 = fmov f2 // Arithmetic // Add, Sub, Mul, Div. -def FpADD : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP, - [(set RFP:$dst, (fadd RFP:$src1, RFP:$src2))]>; -def FpSUB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP, - [(set RFP:$dst, (fsub RFP:$src1, RFP:$src2))]>; -def FpMUL : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP, - [(set RFP:$dst, (fmul RFP:$src1, RFP:$src2))]>; -def FpDIV : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP, - [(set RFP:$dst, (fdiv RFP:$src1, RFP:$src2))]>; +def FpADD32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP, + [(set RFP32:$dst, (fadd RFP32:$src1, RFP32:$src2))]>; +def FpSUB32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP, + [(set RFP32:$dst, (fsub RFP32:$src1, RFP32:$src2))]>; +def FpMUL32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP, + [(set RFP32:$dst, (fmul RFP32:$src1, RFP32:$src2))]>; +def FpDIV32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP, + [(set RFP32:$dst, (fdiv RFP32:$src1, RFP32:$src2))]>; +def FpADD64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP, + [(set RFP64:$dst, (fadd RFP64:$src1, RFP64:$src2))]>; +def FpSUB64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP, + [(set RFP64:$dst, (fsub RFP64:$src1, RFP64:$src2))]>; +def FpMUL64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP, + [(set RFP64:$dst, (fmul RFP64:$src1, RFP64:$src2))]>; +def FpDIV64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP, + [(set RFP64:$dst, (fdiv RFP64:$src1, RFP64:$src2))]>; class FPST0rInst o, string asm> : FPI, D8; @@ -144,47 +173,41 @@ class FPrST0PInst o, string asm> : FPI, DE; // Binary Ops with a memory source. -def FpADD32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fadd RFP:$src1, - (extloadf64f32 addr:$src2)))]>; +def FpADD32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fadd RFP32:$src1, (loadf32 addr:$src2)))]>; // ST(0) = ST(0) + [mem32] -def FpADD64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fadd RFP:$src1, (loadf64 addr:$src2)))]>; +def FpADD64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fadd RFP64:$src1, (loadf64 addr:$src2)))]>; // ST(0) = ST(0) + [mem64] -def FpMUL32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fmul RFP:$src1, - (extloadf64f32 addr:$src2)))]>; +def FpMUL32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fmul RFP32:$src1, (loadf32 addr:$src2)))]>; // ST(0) = ST(0) * [mem32] -def FpMUL64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fmul RFP:$src1, (loadf64 addr:$src2)))]>; +def FpMUL64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fmul RFP64:$src1, (loadf64 addr:$src2)))]>; // ST(0) = ST(0) * [mem64] -def FpSUB32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub RFP:$src1, - (extloadf64f32 addr:$src2)))]>; +def FpSUB32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fsub RFP32:$src1, (loadf32 addr:$src2)))]>; // ST(0) = ST(0) - [mem32] -def FpSUB64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub RFP:$src1, (loadf64 addr:$src2)))]>; +def FpSUB64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fsub RFP64:$src1, (loadf64 addr:$src2)))]>; // ST(0) = ST(0) - [mem64] -def FpSUBR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub (extloadf64f32 addr:$src2), - RFP:$src1))]>; +def FpSUBR32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fsub (loadf32 addr:$src2), RFP32:$src1))]>; // ST(0) = [mem32] - ST(0) -def FpSUBR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub (loadf64 addr:$src2), RFP:$src1))]>; +def FpSUBR64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fsub (loadf64 addr:$src2), RFP64:$src1))]>; // ST(0) = [mem64] - ST(0) -def FpDIV32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv RFP:$src1, - (extloadf64f32 addr:$src2)))]>; +def FpDIV32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fdiv RFP32:$src1, (loadf32 addr:$src2)))]>; // ST(0) = ST(0) / [mem32] -def FpDIV64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv RFP:$src1, (loadf64 addr:$src2)))]>; +def FpDIV64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fdiv RFP64:$src1, (loadf64 addr:$src2)))]>; // ST(0) = ST(0) / [mem64] -def FpDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv (extloadf64f32 addr:$src2), - RFP:$src1))]>; +def FpDIVR32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fdiv (loadf32 addr:$src2), RFP32:$src1))]>; // ST(0) = [mem32] / ST(0) -def FpDIVR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv (loadf64 addr:$src2), RFP:$src1))]>; +def FpDIVR64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fdiv (loadf64 addr:$src2), RFP64:$src1))]>; // ST(0) = [mem64] / ST(0) @@ -201,53 +224,102 @@ def FDIV64m : FPI<0xDC, MRM6m, (ops f64mem:$src), "fdiv{l} $src">; def FDIVR32m : FPI<0xD8, MRM7m, (ops f32mem:$src), "fdivr{s} $src">; def FDIVR64m : FPI<0xDC, MRM7m, (ops f64mem:$src), "fdivr{l} $src">; -def FpIADD16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fadd RFP:$src1, +def FpIADD16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fadd RFP32:$src1, (X86fild addr:$src2, i16)))]>; // ST(0) = ST(0) + [mem16int] -def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fadd RFP:$src1, +def FpIADD32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fadd RFP32:$src1, (X86fild addr:$src2, i32)))]>; // ST(0) = ST(0) + [mem32int] -def FpIMUL16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fmul RFP:$src1, +def FpIMUL16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fmul RFP32:$src1, (X86fild addr:$src2, i16)))]>; // ST(0) = ST(0) * [mem16int] -def FpIMUL32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fmul RFP:$src1, +def FpIMUL32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fmul RFP32:$src1, (X86fild addr:$src2, i32)))]>; // ST(0) = ST(0) * [mem32int] -def FpISUB16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub RFP:$src1, +def FpISUB16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fsub RFP32:$src1, (X86fild addr:$src2, i16)))]>; // ST(0) = ST(0) - [mem16int] -def FpISUB32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub RFP:$src1, +def FpISUB32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fsub RFP32:$src1, (X86fild addr:$src2, i32)))]>; // ST(0) = ST(0) - [mem32int] -def FpISUBR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub (X86fild addr:$src2, i16), - RFP:$src1))]>; +def FpISUBR16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fsub (X86fild addr:$src2, i16), + RFP32:$src1))]>; // ST(0) = [mem16int] - ST(0) -def FpISUBR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fsub (X86fild addr:$src2, i32), - RFP:$src1))]>; +def FpISUBR32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fsub (X86fild addr:$src2, i32), + RFP32:$src1))]>; // ST(0) = [mem32int] - ST(0) -def FpIDIV16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv RFP:$src1, +def FpIDIV16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fdiv RFP32:$src1, (X86fild addr:$src2, i16)))]>; // ST(0) = ST(0) / [mem16int] -def FpIDIV32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv RFP:$src1, +def FpIDIV32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fdiv RFP32:$src1, (X86fild addr:$src2, i32)))]>; // ST(0) = ST(0) / [mem32int] -def FpIDIVR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv (X86fild addr:$src2, i16), - RFP:$src1))]>; +def FpIDIVR16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fdiv (X86fild addr:$src2, i16), + RFP32:$src1))]>; // ST(0) = [mem16int] / ST(0) -def FpIDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fdiv (X86fild addr:$src2, i32), - RFP:$src1))]>; +def FpIDIVR32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP32:$dst, (fdiv (X86fild addr:$src2, i32), + RFP32:$src1))]>; + // ST(0) = [mem32int] / ST(0) + +def FpIADD16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fadd RFP64:$src1, + (X86fild addr:$src2, i16)))]>; + // ST(0) = ST(0) + [mem16int] +def FpIADD32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fadd RFP64:$src1, + (X86fild addr:$src2, i32)))]>; + // ST(0) = ST(0) + [mem32int] +def FpIMUL16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fmul RFP64:$src1, + (X86fild addr:$src2, i16)))]>; + // ST(0) = ST(0) * [mem16int] +def FpIMUL32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fmul RFP64:$src1, + (X86fild addr:$src2, i32)))]>; + // ST(0) = ST(0) * [mem32int] +def FpISUB16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fsub RFP64:$src1, + (X86fild addr:$src2, i16)))]>; + // ST(0) = ST(0) - [mem16int] +def FpISUB32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fsub RFP64:$src1, + (X86fild addr:$src2, i32)))]>; + // ST(0) = ST(0) - [mem32int] +def FpISUBR16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fsub (X86fild addr:$src2, i16), + RFP64:$src1))]>; + // ST(0) = [mem16int] - ST(0) +def FpISUBR32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fsub (X86fild addr:$src2, i32), + RFP64:$src1))]>; + // ST(0) = [mem32int] - ST(0) +def FpIDIV16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fdiv RFP64:$src1, + (X86fild addr:$src2, i16)))]>; + // ST(0) = ST(0) / [mem16int] +def FpIDIV32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fdiv RFP64:$src1, + (X86fild addr:$src2, i32)))]>; + // ST(0) = ST(0) / [mem32int] +def FpIDIVR16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fdiv (X86fild addr:$src2, i16), + RFP64:$src1))]>; + // ST(0) = [mem16int] / ST(0) +def FpIDIVR32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP64:$dst, (fdiv (X86fild addr:$src2, i32), + RFP64:$src1))]>; // ST(0) = [mem32int] / ST(0) def FIADD16m : FPI<0xDE, MRM0m, (ops i16mem:$src), "fiadd{s} $src">; @@ -285,19 +357,31 @@ def FDIVST0r : FPST0rInst <0xF0, "fdiv $op">; def FDIVRrST0 : FPrST0Inst <0xF0, "fdiv{|r} {%st(0), $op|$op, %ST(0)}">; def FDIVRPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p $op">; - // Unary operations. -def FpCHS : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW, - [(set RFP:$dst, (fneg RFP:$src))]>; -def FpABS : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW, - [(set RFP:$dst, (fabs RFP:$src))]>; -def FpSQRT : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW, - [(set RFP:$dst, (fsqrt RFP:$src))]>; -def FpSIN : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW, - [(set RFP:$dst, (fsin RFP:$src))]>; -def FpCOS : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW, - [(set RFP:$dst, (fcos RFP:$src))]>; -def FpTST : FpI<(ops RFP:$src), OneArgFP, +def FpCHS32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW, + [(set RFP32:$dst, (fneg RFP32:$src))]>; +def FpABS32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW, + [(set RFP32:$dst, (fabs RFP32:$src))]>; +def FpSQRT32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW, + [(set RFP32:$dst, (fsqrt RFP32:$src))]>; +def FpSIN32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW, + [(set RFP32:$dst, (fsin RFP32:$src))]>; +def FpCOS32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW, + [(set RFP32:$dst, (fcos RFP32:$src))]>; +def FpTST32 : FpI<(ops RFP32:$src), OneArgFP, + []>; + +def FpCHS64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW, + [(set RFP64:$dst, (fneg RFP64:$src))]>; +def FpABS64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW, + [(set RFP64:$dst, (fabs RFP64:$src))]>; +def FpSQRT64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW, + [(set RFP64:$dst, (fsqrt RFP64:$src))]>; +def FpSIN64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW, + [(set RFP64:$dst, (fsin RFP64:$src))]>; +def FpCOS64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW, + [(set RFP64:$dst, (fcos RFP64:$src))]>; +def FpTST64 : FpI<(ops RFP64:$src), OneArgFP, []>; def FCHS : FPI<0xE0, RawFrm, (ops), "fchs">, D9; @@ -310,29 +394,54 @@ def FTST : FPI<0xE4, RawFrm, (ops), "ftst">, D9; // Floating point cmovs. let isTwoAddress = 1 in { - def FpCMOVB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + def FpCMOVB32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP, + [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, X86_COND_B))]>; - def FpCMOVBE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + def FpCMOVBE32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP, + [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, X86_COND_BE))]>; - def FpCMOVE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + def FpCMOVE32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP, + [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, X86_COND_E))]>; - def FpCMOVP : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + def FpCMOVP32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP, + [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, X86_COND_P))]>; - def FpCMOVNB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + def FpCMOVNB32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP, + [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, X86_COND_AE))]>; - def FpCMOVNBE: FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + def FpCMOVNBE32: FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP, + [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, X86_COND_A))]>; - def FpCMOVNE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + def FpCMOVNE32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP, + [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, X86_COND_NE))]>; - def FpCMOVNP : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP, - [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2, + def FpCMOVNP32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP, + [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, + X86_COND_NP))]>; + + def FpCMOVB64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP, + [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, + X86_COND_B))]>; + def FpCMOVBE64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP, + [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, + X86_COND_BE))]>; + def FpCMOVE64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP, + [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, + X86_COND_E))]>; + def FpCMOVP64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP, + [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, + X86_COND_P))]>; + def FpCMOVNB64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP, + [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, + X86_COND_AE))]>; + def FpCMOVNBE64: FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP, + [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, + X86_COND_A))]>; + def FpCMOVNE64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP, + [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, + X86_COND_NE))]>; + def FpCMOVNP64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP, + [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, X86_COND_NP))]>; } @@ -354,27 +463,39 @@ def FCMOVNP : FPI<0xD8, AddRegFrm, (ops RST:$op), "fcmovnu {$op, %st(0)|%ST(0), $op}">, DB; // Floating point loads & stores. -def FpLD32m : FpI<(ops RFP:$dst, f32mem:$src), ZeroArgFP, - [(set RFP:$dst, (extloadf64f32 addr:$src))]>; -def FpLD64m : FpI<(ops RFP:$dst, f64mem:$src), ZeroArgFP, - [(set RFP:$dst, (loadf64 addr:$src))]>; -def FpILD16m : FpI<(ops RFP:$dst, i16mem:$src), ZeroArgFP, - [(set RFP:$dst, (X86fild addr:$src, i16))]>; -def FpILD32m : FpI<(ops RFP:$dst, i32mem:$src), ZeroArgFP, - [(set RFP:$dst, (X86fild addr:$src, i32))]>; -def FpILD64m : FpI<(ops RFP:$dst, i64mem:$src), ZeroArgFP, - [(set RFP:$dst, (X86fild addr:$src, i64))]>; +def FpLD32m : FpI<(ops RFP32:$dst, f32mem:$src), ZeroArgFP, + [(set RFP32:$dst, (loadf32 addr:$src))]>; +def FpLD64m : FpI<(ops RFP64:$dst, f64mem:$src), ZeroArgFP, + [(set RFP64:$dst, (loadf64 addr:$src))]>; +def FpILD16m32 : FpI<(ops RFP32:$dst, i16mem:$src), ZeroArgFP, + [(set RFP32:$dst, (X86fild addr:$src, i16))]>; +def FpILD32m32 : FpI<(ops RFP32:$dst, i32mem:$src), ZeroArgFP, + [(set RFP32:$dst, (X86fild addr:$src, i32))]>; +def FpILD64m32 : FpI<(ops RFP32:$dst, i64mem:$src), ZeroArgFP, + [(set RFP32:$dst, (X86fild addr:$src, i64))]>; +def FpILD16m64 : FpI<(ops RFP64:$dst, i16mem:$src), ZeroArgFP, + [(set RFP64:$dst, (X86fild addr:$src, i16))]>; +def FpILD32m64 : FpI<(ops RFP64:$dst, i32mem:$src), ZeroArgFP, + [(set RFP64:$dst, (X86fild addr:$src, i32))]>; +def FpILD64m64 : FpI<(ops RFP64:$dst, i64mem:$src), ZeroArgFP, + [(set RFP64:$dst, (X86fild addr:$src, i64))]>; -def FpST32m : FpI<(ops f32mem:$op, RFP:$src), OneArgFP, - [(truncstoref32 RFP:$src, addr:$op)]>; -def FpST64m : FpI<(ops f64mem:$op, RFP:$src), OneArgFP, - [(store RFP:$src, addr:$op)]>; +def FpST32m : FpI<(ops f32mem:$op, RFP32:$src), OneArgFP, + [(store RFP32:$src, addr:$op)]>; +def FpST64m32 : FpI<(ops f32mem:$op, RFP64:$src), OneArgFP, + [(truncstoref32 RFP64:$src, addr:$op)]>; +def FpST64m : FpI<(ops f64mem:$op, RFP64:$src), OneArgFP, + [(store RFP64:$src, addr:$op)]>; -def FpSTP32m : FpI<(ops f32mem:$op, RFP:$src), OneArgFP, []>; -def FpSTP64m : FpI<(ops f64mem:$op, RFP:$src), OneArgFP, []>; -def FpIST16m : FpI<(ops i16mem:$op, RFP:$src), OneArgFP, []>; -def FpIST32m : FpI<(ops i32mem:$op, RFP:$src), OneArgFP, []>; -def FpIST64m : FpI<(ops i64mem:$op, RFP:$src), OneArgFP, []>; +def FpSTP32m : FpI<(ops f32mem:$op, RFP32:$src), OneArgFP, []>; +def FpSTP64m32 : FpI<(ops f32mem:$op, RFP64:$src), OneArgFP, []>; +def FpSTP64m : FpI<(ops f64mem:$op, RFP64:$src), OneArgFP, []>; +def FpIST16m32 : FpI<(ops i16mem:$op, RFP32:$src), OneArgFP, []>; +def FpIST32m32 : FpI<(ops i32mem:$op, RFP32:$src), OneArgFP, []>; +def FpIST64m32 : FpI<(ops i64mem:$op, RFP32:$src), OneArgFP, []>; +def FpIST16m64 : FpI<(ops i16mem:$op, RFP64:$src), OneArgFP, []>; +def FpIST32m64 : FpI<(ops i32mem:$op, RFP64:$src), OneArgFP, []>; +def FpIST64m64 : FpI<(ops i64mem:$op, RFP64:$src), OneArgFP, []>; def FLD32m : FPI<0xD9, MRM0m, (ops f32mem:$src), "fld{s} $src">; def FLD64m : FPI<0xDD, MRM0m, (ops f64mem:$src), "fld{l} $src">; @@ -392,14 +513,23 @@ def FISTP32m : FPI<0xDB, MRM3m, (ops i32mem:$dst), "fistp{l} $dst">; def FISTP64m : FPI<0xDF, MRM7m, (ops i64mem:$dst), "fistp{ll} $dst">; // FISTTP requires SSE3 even though it's a FPStack op. -def FpISTT16m : FpI_<(ops i16mem:$op, RFP:$src), OneArgFP, - [(X86fp_to_i16mem RFP:$src, addr:$op)]>, +def FpISTT16m32 : FpI_<(ops i16mem:$op, RFP32:$src), OneArgFP, + [(X86fp_to_i16mem RFP32:$src, addr:$op)]>, Requires<[HasSSE3]>; -def FpISTT32m : FpI_<(ops i32mem:$op, RFP:$src), OneArgFP, - [(X86fp_to_i32mem RFP:$src, addr:$op)]>, +def FpISTT32m32 : FpI_<(ops i32mem:$op, RFP32:$src), OneArgFP, + [(X86fp_to_i32mem RFP32:$src, addr:$op)]>, Requires<[HasSSE3]>; -def FpISTT64m : FpI_<(ops i64mem:$op, RFP:$src), OneArgFP, - [(X86fp_to_i64mem RFP:$src, addr:$op)]>, +def FpISTT64m32 : FpI_<(ops i64mem:$op, RFP32:$src), OneArgFP, + [(X86fp_to_i64mem RFP32:$src, addr:$op)]>, + Requires<[HasSSE3]>; +def FpISTT16m64 : FpI_<(ops i16mem:$op, RFP64:$src), OneArgFP, + [(X86fp_to_i16mem RFP64:$src, addr:$op)]>, + Requires<[HasSSE3]>; +def FpISTT32m64 : FpI_<(ops i32mem:$op, RFP64:$src), OneArgFP, + [(X86fp_to_i32mem RFP64:$src, addr:$op)]>, + Requires<[HasSSE3]>; +def FpISTT64m64 : FpI_<(ops i64mem:$op, RFP64:$src), OneArgFP, + [(X86fp_to_i64mem RFP64:$src, addr:$op)]>, Requires<[HasSSE3]>; def FISTTP16m : FPI<0xDF, MRM1m, (ops i16mem:$dst), "fisttp{s} $dst">; @@ -414,10 +544,14 @@ def FXCH : FPI<0xC8, AddRegFrm, (ops RST:$op), "fxch $op">, D9; // Floating point constant loads. let isReMaterializable = 1 in { -def FpLD0 : FpI<(ops RFP:$dst), ZeroArgFP, - [(set RFP:$dst, fp64imm0)]>; -def FpLD1 : FpI<(ops RFP:$dst), ZeroArgFP, - [(set RFP:$dst, fp64imm1)]>; +def FpLD032 : FpI<(ops RFP32:$dst), ZeroArgFP, + [(set RFP32:$dst, fpimm0)]>; +def FpLD132 : FpI<(ops RFP32:$dst), ZeroArgFP, + [(set RFP32:$dst, fpimm1)]>; +def FpLD064 : FpI<(ops RFP64:$dst), ZeroArgFP, + [(set RFP64:$dst, fpimm0)]>; +def FpLD164 : FpI<(ops RFP64:$dst), ZeroArgFP, + [(set RFP64:$dst, fpimm1)]>; } def FLD0 : FPI<0xEE, RawFrm, (ops), "fldz">, D9; @@ -425,10 +559,14 @@ def FLD1 : FPI<0xE8, RawFrm, (ops), "fld1">, D9; // Floating point compares. -def FpUCOMr : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP, +def FpUCOMr32 : FpI<(ops RFP32:$lhs, RFP32:$rhs), CompareFP, []>; // FPSW = cmp ST(0) with ST(i) -def FpUCOMIr : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP, - [(X86cmp RFP:$lhs, RFP:$rhs)]>; // CC = cmp ST(0) with ST(i) +def FpUCOMIr32 : FpI<(ops RFP32:$lhs, RFP32:$rhs), CompareFP, + [(X86cmp RFP32:$lhs, RFP32:$rhs)]>; // CC = cmp ST(0) with ST(i) +def FpUCOMr64 : FpI<(ops RFP64:$lhs, RFP64:$rhs), CompareFP, + []>; // FPSW = cmp ST(0) with ST(i) +def FpUCOMIr64 : FpI<(ops RFP64:$lhs, RFP64:$rhs), CompareFP, + [(X86cmp RFP64:$lhs, RFP64:$rhs)]>; // CC = cmp ST(0) with ST(i) def FUCOMr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i) (ops RST:$reg), @@ -447,7 +585,6 @@ def FUCOMIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop (ops RST:$reg), "fucomip {$reg, %st(0)|%ST(0), $reg}">, DF, Imp<[ST0],[]>; - // Floating point flag ops. def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags (ops), "fnstsw", []>, DF, Imp<[],[AX]>; @@ -466,12 +603,18 @@ def : Pat<(X86fld addr:$src, f32), (FpLD32m addr:$src)>; def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>; // Required for CALL which return f32 / f64 values. -def : Pat<(X86fst RFP:$src, addr:$op, f32), (FpST32m addr:$op, RFP:$src)>; -def : Pat<(X86fst RFP:$src, addr:$op, f64), (FpST64m addr:$op, RFP:$src)>; +def : Pat<(X86fst RFP32:$src, addr:$op, f32), (FpST32m addr:$op, RFP32:$src)>; +def : Pat<(X86fst RFP64:$src, addr:$op, f32), (FpST64m32 addr:$op, RFP64:$src)>; +def : Pat<(X86fst RFP64:$src, addr:$op, f64), (FpST64m addr:$op, RFP64:$src)>; // Floating point constant -0.0 and -1.0 -def : Pat<(f64 fp64immneg0), (FpCHS (FpLD0))>, Requires<[FPStack]>; -def : Pat<(f64 fp64immneg1), (FpCHS (FpLD1))>, Requires<[FPStack]>; +def : Pat<(f32 fpimmneg0), (FpCHS32 (FpLD032))>, Requires<[FPStack]>; +def : Pat<(f32 fpimmneg1), (FpCHS32 (FpLD132))>, Requires<[FPStack]>; +def : Pat<(f64 fpimmneg0), (FpCHS64 (FpLD064))>, Requires<[FPStack]>; +def : Pat<(f64 fpimmneg1), (FpCHS64 (FpLD164))>, Requires<[FPStack]>; // Used to conv. i64 to f64 since there isn't a SSE version. -def : Pat<(X86fildflag addr:$src, i64), (FpILD64m addr:$src)>; +def : Pat<(X86fildflag addr:$src, i64), (FpILD64m64 addr:$src)>; + +def : Pat<(extloadf32 addr:$src), (FpMOV3264 (FpLD32m addr:$src))>, Requires<[FPStack]>; +def : Pat<(fextend RFP32:$src), (FpMOV3264 RFP32:$src)>, Requires<[FPStack]>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 99594906a68..89ff6f188b9 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -33,7 +33,8 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, if (oc == X86::MOV8rr || oc == X86::MOV16rr || oc == X86::MOV32rr || oc == X86::MOV64rr || oc == X86::MOV16to16_ || oc == X86::MOV32to32_ || - oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr || + oc == X86::FpMOV3232 || oc == X86::MOVSSrr || oc == X86::MOVSDrr || + oc == X86::FpMOV3264 || oc == X86::FpMOV6432 || oc == X86::FpMOV6464 || oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr || oc == X86::MOVAPSrr || oc == X86::MOVAPDrr || oc == X86::MOVSS2PSrr || oc == X86::MOVSD2PDrr || diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index d604091aefe..2cbd31e5ea4 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -964,7 +964,7 @@ def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (ops VR128:$src1, f128mem:$src2), // Alias instructions that map fld0 to pxor for sse. def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst), - "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>, + "pxor $dst, $dst", [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>, TB, OpSize; // Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 30d029f6ca1..23970ed59cc 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -82,8 +82,10 @@ void X86RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB, Opc = X86::MOV32_mr; } else if (RC == &X86::GR16_RegClass) { Opc = X86::MOV16_mr; - } else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) { + } else if (RC == &X86::RFP64RegClass || RC == &X86::RSTRegClass) { Opc = X86::FpST64m; + } else if (RC == &X86::RFP32RegClass) { + Opc = X86::FpST32m; } else if (RC == &X86::FR32RegClass) { Opc = X86::MOVSSmr; } else if (RC == &X86::FR64RegClass) { @@ -117,8 +119,10 @@ void X86RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, Opc = X86::MOV32_rm; } else if (RC == &X86::GR16_RegClass) { Opc = X86::MOV16_rm; - } else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) { + } else if (RC == &X86::RFP64RegClass || RC == &X86::RSTRegClass) { Opc = X86::FpLD64m; + } else if (RC == &X86::RFP32RegClass) { + Opc = X86::FpLD32m; } else if (RC == &X86::FR32RegClass) { Opc = X86::MOVSSrm; } else if (RC == &X86::FR64RegClass) { @@ -151,8 +155,10 @@ void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB, Opc = X86::MOV32_rr; } else if (RC == &X86::GR16_RegClass) { Opc = X86::MOV16_rr; - } else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) { - Opc = X86::FpMOV; + } else if (RC == &X86::RFP32RegClass) { + Opc = X86::FpMOV3232; + } else if (RC == &X86::RFP64RegClass || RC == &X86::RSTRegClass) { + Opc = X86::FpMOV6464; } else if (RC == &X86::FR32RegClass) { Opc = X86::FsMOVAPSrr; } else if (RC == &X86::FR64RegClass) { diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 089078a3ae8..a1e7bb984df 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -425,7 +425,8 @@ def FR64 : RegisterClass<"X86", [f64], 64, // faster on common hardware. In reality, this should be controlled by a // command line option or something. -def RFP : RegisterClass<"X86", [f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; +def RFP32 : RegisterClass<"X86", [f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; +def RFP64 : RegisterClass<"X86", [f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; // Floating point stack registers (these are not allocatable by the // register allocator - the floating point stackifier is responsible