diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 064243f3181..6d7f242127c 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -566,35 +566,34 @@ def SubReg_i32_lane : SDNodeXForm op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> + : N2V; class N2VD op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V; class N2VQ op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V; -// Basic 2-register operations, scalar single-precision. -class N2VDs op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, - string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> +// Basic 2-register intrinsics: single-, double- and quad-register. +class N2VSInt op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; - -class N2VDsPat - : NEONFPPat<(ResTy (OpNode SPR:$a)), - (EXTRACT_SUBREG (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0)), - arm_ssubreg_0)>; - -// Basic 2-register intrinsics, both double- and quad-register. + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin, + OpcodeStr, Dt, "$dst, $src", "", []>; class N2VDInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, @@ -610,21 +609,6 @@ class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; -// Basic 2-register intrinsics, scalar single-precision -class N2VDInts op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N2V; - -class N2VDIntsPat - : NEONFPPat<(f32 (OpNode SPR:$a)), - (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0)), - arm_ssubreg_0)>; - // Narrow 2-register intrinsics. class N2VNInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -655,7 +639,16 @@ class N2VQShuffle op19_18, bits<5> op11_7, (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2", "$src1 = $dst1, $src2 = $dst2", []>; -// Basic 3-register operations, both double- and quad-register. +// Basic 3-register operations: single-, double- and quad-register. +class N3VS op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, + SDNode OpNode, bit Commutable> + : N3V { + let isCommutable = Commutable; +} + class N3VD op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> @@ -740,23 +733,6 @@ class N3VQSL16 op21_20, bits<4> op11_8, string OpcodeStr, string Dt, let isCommutable = 0; } -// Basic 3-register operations, scalar single-precision -class N3VDs op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDNode OpNode, bit Commutable> - : N3V { - let isCommutable = Commutable; -} -class N3VDsPat - : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), - (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, arm_ssubreg_0)), - arm_ssubreg_0)>; - // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, @@ -824,7 +800,15 @@ class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, let isCommutable = 0; } -// Multiply-Add/Sub operations, both double- and quad-register. +// Multiply-Add/Sub operations: single-, double- and quad-register. +class N3VSMulOp op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDNode MulOp, SDNode OpNode> + : N3V; + class N3VDMulOp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> @@ -892,25 +876,6 @@ class N3VQMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, (ResTy (NEONvduplane (OpTy DPR_8:$src3), imm:$lane)))))))]>; -// Multiply-Add/Sub operations, scalar single-precision -class N3VDMulOps op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode MulOp, SDNode OpNode> - : N3V; - -class N3VDMulOpsPat - : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), - (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$acc, arm_ssubreg_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, arm_ssubreg_0)), - arm_ssubreg_0)>; - // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. class N3VDInt3 op21_20, bits<4> op11_8, bit op4, @@ -2409,7 +2374,7 @@ def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; // VNEG : Vector Negate (floating-point) -def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, +def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, "vneg", "f32", "$dst, $src", "", [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; @@ -2995,71 +2960,95 @@ def VTBX4 // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// +class N2VSPat + : NEONFPPat<(ResTy (OpNode SPR:$a)), + (EXTRACT_SUBREG (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), + SPR:$a, arm_ssubreg_0)), + arm_ssubreg_0)>; + +class N3VSPat + : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), + (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$a, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$b, arm_ssubreg_0)), + arm_ssubreg_0)>; + +class N3VSMulOpPat + : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), + (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$acc, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$a, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$b, arm_ssubreg_0)), + arm_ssubreg_0)>; + // These need separate instructions because they must use DPR_VFP2 register // class which have SPR sub-registers. // Vector Add Operations used for single-precision FP let neverHasSideEffects = 1 in -def VADDfd_sfp : N3VDs<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>; -def : N3VDsPat; +def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>; +def : N3VSPat; // Vector Sub Operations used for single-precision FP let neverHasSideEffects = 1 in -def VSUBfd_sfp : N3VDs<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>; -def : N3VDsPat; +def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>; +def : N3VSPat; // Vector Multiply Operations used for single-precision FP let neverHasSideEffects = 1 in -def VMULfd_sfp : N3VDs<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>; -def : N3VDsPat; +def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>; +def : N3VSPat; // Vector Multiply-Accumulate/Subtract used for single-precision FP // vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so // we want to avoid them for now. e.g., alternating vmla/vadd instructions. //let neverHasSideEffects = 1 in -//def VMLAfd_sfp : N3VDMulOps<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32", +//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32", // v2f32, fmul, fadd>; -//def : N3VDMulOpsPat; +//def : N3VSMulOpPat; //let neverHasSideEffects = 1 in -//def VMLSfd_sfp : N3VDMulOps<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32", +//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32", // v2f32, fmul, fsub>; -//def : N3VDMulOpsPat; +//def : N3VSMulOpPat; // Vector Absolute used for single-precision FP let neverHasSideEffects = 1 in -def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, IIC_VUNAD, +def VABSfd_sfp : N2VSInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, IIC_VUNAD, "vabs", "f32", v2f32, v2f32, int_arm_neon_vabs>; -def : N2VDIntsPat; +def : N2VSPat; // Vector Negate used for single-precision FP let neverHasSideEffects = 1 in -def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, - "vneg", "f32", "$dst, $src", "", []>; -def : N2VDIntsPat; +def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, + "vneg", "f32", "$dst, $src", "", []>; +def : N2VSPat; // Vector Convert between single-precision FP and integer let neverHasSideEffects = 1 in -def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", - v2i32, v2f32, fp_to_sint>; -def : N2VDsPat; +def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", + v2i32, v2f32, fp_to_sint>; +def : N2VSPat; let neverHasSideEffects = 1 in -def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", - v2i32, v2f32, fp_to_uint>; -def : N2VDsPat; +def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", + v2i32, v2f32, fp_to_uint>; +def : N2VSPat; let neverHasSideEffects = 1 in -def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", - v2f32, v2i32, sint_to_fp>; -def : N2VDsPat; +def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", + v2f32, v2i32, sint_to_fp>; +def : N2VSPat; let neverHasSideEffects = 1 in -def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", - v2f32, v2i32, uint_to_fp>; -def : N2VDsPat; +def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", + v2f32, v2i32, uint_to_fp>; +def : N2VSPat; //===----------------------------------------------------------------------===// // Non-Instruction Patterns