diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index c82ae7eceee..35c4b0d3c81 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -58,8 +58,6 @@ static const ARM_MLxEntry ARM_MLxTable[] = { { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, - { ARM::VMLAfd_sfp, ARM::VMULfd_sfp, ARM::VADDfd_sfp, false, false }, - { ARM::VMLSfd_sfp, ARM::VMULfd_sfp, ARM::VSUBfd_sfp, false, false }, { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index c4b590c2ac2..b95e02fbf9f 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1667,7 +1667,7 @@ def SubReg_i32_lane : SDNodeXForm op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> @@ -1736,13 +1736,7 @@ class N2VQShuffle op19_18, bits<5> op11_7, (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd, $src2 = $Vm", []>; -// Basic 3-register operations: single-, double- and quad-register. -class N3VS op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt> - : N3V; - +// Basic 3-register operations: double- and quad-register. class N3VD op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> @@ -1912,13 +1906,7 @@ class N3VQIntSh op21_20, bits<4> op11_8, bit op4, let isCommutable = 0; } -// Multiply-Add/Sub operations: single-, double- and quad-register. -class N3VSMulOp op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt> - : N3V; - +// Multiply-Add/Sub operations: double- and quad-register. class N3VDMulOp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> @@ -4678,83 +4666,47 @@ def VTBX4Pseudo class N2VSPat : NEONFPPat<(f32 (OpNode SPR:$a)), (EXTRACT_SUBREG - (v2f32 (COPY_TO_REGCLASS - (Inst (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (Inst + (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; class N3VSPat : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), - (EXTRACT_SUBREG (v2f32 - (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, ssub_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, ssub_0))), - ssub_0)>; + (EXTRACT_SUBREG + (v2f32 (COPY_TO_REGCLASS (Inst + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$a, ssub_0), + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; class N3VSMulOpPat : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), - (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$acc, ssub_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, ssub_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, ssub_0)), - ssub_0)>; + (EXTRACT_SUBREG + (v2f32 (COPY_TO_REGCLASS (Inst + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$acc, ssub_0), + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$a, ssub_0), + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; -// These need separate instructions because they must use DPR_VFP2 register -// class which have SPR sub-registers. - -// Vector Add Operations used for single-precision FP -let neverHasSideEffects = 1 in -def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32">; -def : N3VSPat; - -// Vector Sub Operations used for single-precision FP -let neverHasSideEffects = 1 in -def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32">; -def : N3VSPat; - -// Vector Multiply Operations used for single-precision FP -let neverHasSideEffects = 1 in -def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32">; -def : N3VSPat; - -// Vector Multiply-Accumulate/Subtract used for single-precision FP -// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so -// we want to avoid them for now. e.g., alternating vmla/vadd instructions. - -let neverHasSideEffects = 1 in -def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32">; -def : N3VSMulOpPat, +def : N3VSPat; +def : N3VSPat; +def : N3VSPat; +def : N3VSMulOpPat, Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; - -let neverHasSideEffects = 1 in -def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32">; -def : N3VSMulOpPat, +def : N3VSMulOpPat, Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; - -// Vector Absolute used for single-precision FP def : N2VSPat; - -// Vector Negate used for single-precision FP def : N2VSPat; - -// Vector Maximum used for single-precision FP -let neverHasSideEffects = 1 in -def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$Vd), - (ins DPR_VFP2:$Vn, DPR_VFP2:$Vm), N3RegFrm, IIC_VBIND, - "vmax", "f32", "$Vd, $Vn, $Vm", "", []>; -def : N3VSPat; - -// Vector Minimum used for single-precision FP -let neverHasSideEffects = 1 in -def VMINfd_sfp : N3V<0, 0, 0b10, 0b1111, 0, 0, (outs DPR_VFP2:$Vd), - (ins DPR_VFP2:$Vn, DPR_VFP2:$Vm), N3RegFrm, IIC_VBIND, - "vmin", "f32", "$Vd, $Vn, $Vm", "", []>; -def : N3VSPat; - -// Vector Convert between single-precision FP and integer +def : N3VSPat; +def : N3VSPat; def : N2VSPat; def : N2VSPat; def : N2VSPat; diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll index c169fb334a3..b8c8cb122a1 100644 --- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll +++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll @@ -15,9 +15,6 @@ bb.nph: ; preds = %bb5 ; Loop preheader ; CHECK: vmov.f32 -; CHECK: vsub.f32 -; CHECK: vadd.f32 -; CHECK: vmul.f32 bb7: ; preds = %bb9, %bb.nph %s1.02 = phi float [ undef, %bb.nph ], [ %35, %bb9 ] ; [#uses=3] %tmp79 = add i32 undef, undef ; [#uses=1] @@ -73,8 +70,6 @@ bb8: ; preds = %bb8, %bb7 br i1 %34, label %bb8, label %bb9 bb9: ; preds = %bb8 -; CHECK: %bb9 -; CHECK: vmov.f32 %35 = fadd float 0.000000e+00, undef ; [#uses=1] br label %bb7 } diff --git a/utils/TableGen/ARMDecoderEmitter.cpp b/utils/TableGen/ARMDecoderEmitter.cpp index c4e78f358ec..10e507f603b 100644 --- a/utils/TableGen/ARMDecoderEmitter.cpp +++ b/utils/TableGen/ARMDecoderEmitter.cpp @@ -1613,11 +1613,6 @@ ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI, Name == "VNEGScc") return false; - // Ignore the *_sfp instructions when decoding. They are used by the - // compiler to implement scalar floating point operations using vector - // operations in order to work around some performance issues. - if (Name.find("_sfp") != std::string::npos) return false; - // LDMIA_RET is a special case of LDM (Load Multiple) where the registers // loaded include the PC, causing a branch to a loaded address. Ignore // the LDMIA_RET instruction when decoding.