diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index f22a6b29cd2..105963fe769 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -16,6 +16,8 @@ class OpndItins { InstrItinClass rr = arg_rr; InstrItinClass rm = arg_rm; + // InstrSchedModel info. + X86FoldableSchedWrite Sched = WriteFAdd; } class SizeItins { @@ -45,6 +47,7 @@ def SSE_ALU_ITINS_S : SizeItins< SSE_ALU_F32S, SSE_ALU_F64S >; +let Sched = WriteFMul in { def SSE_MUL_F32S : OpndItins< IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM >; @@ -52,11 +55,13 @@ def SSE_MUL_F32S : OpndItins< def SSE_MUL_F64S : OpndItins< IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM >; +} def SSE_MUL_ITINS_S : SizeItins< SSE_MUL_F32S, SSE_MUL_F64S >; +let Sched = WriteFDiv in { def SSE_DIV_F32S : OpndItins< IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM >; @@ -64,6 +69,7 @@ def SSE_DIV_F32S : OpndItins< def SSE_DIV_F64S : OpndItins< IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM >; +} def SSE_DIV_ITINS_S : SizeItins< SSE_DIV_F32S, SSE_DIV_F64S @@ -82,6 +88,7 @@ def SSE_ALU_ITINS_P : SizeItins< SSE_ALU_F32P, SSE_ALU_F64P >; +let Sched = WriteFMul in { def SSE_MUL_F32P : OpndItins< IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM >; @@ -89,11 +96,13 @@ def SSE_MUL_F32P : OpndItins< def SSE_MUL_F64P : OpndItins< IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM >; +} def SSE_MUL_ITINS_P : SizeItins< SSE_MUL_F32P, SSE_MUL_F64P >; +let Sched = WriteFDiv in { def SSE_DIV_F32P : OpndItins< IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM >; @@ -101,6 +110,7 @@ def SSE_DIV_F32P : OpndItins< def SSE_DIV_F64P : OpndItins< IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM >; +} def SSE_DIV_ITINS_P : SizeItins< SSE_DIV_F32P, SSE_DIV_F64P @@ -110,6 +120,7 @@ def SSE_BIT_ITINS_P : OpndItins< IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM >; +let Sched = WriteVecALU in { def SSE_INTALU_ITINS_P : OpndItins< IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM >; @@ -117,7 +128,9 @@ def SSE_INTALU_ITINS_P : OpndItins< def SSE_INTALUQ_ITINS_P : OpndItins< IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM >; +} +let Sched = WriteVecIMul in def SSE_INTMUL_ITINS_P : OpndItins< IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM >; @@ -148,13 +161,15 @@ multiclass sse12_fp_scalar opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; } def rm : SI; + [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class @@ -189,14 +204,16 @@ multiclass sse12_fp_packed opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>, + Sched<[itins.Sched]>; let mayLoad = 1 in def rm : PI; + itins.rm, d>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class @@ -209,12 +226,14 @@ multiclass sse12_fp_packed_logical_rm opc, RegisterClass RC, Domain d, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rr, IIC_DEFAULT, d>; + pat_rr, IIC_DEFAULT, d>, + Sched<[WriteVecLogic]>; def rm : PI; + pat_rm, IIC_DEFAULT, d>, + Sched<[WriteVecLogicLd, ReadAfterLd]>; } //===----------------------------------------------------------------------===// @@ -444,7 +463,7 @@ multiclass sse12_move_rr; + IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; // For the disassembler let isCodeGenOnly = 1, hasSideEffects = 0 in @@ -464,7 +483,7 @@ multiclass sse12_move, - VEX, VEX_LIG; + VEX, VEX_LIG, Sched<[WriteStore]>; // SSE1 & 2 let Constraints = "$src1 = $dst" in { defm NAME : sse12_move_rr; + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + Sched<[WriteStore]>; } // Loading from memory automatically zeroing upper bits. @@ -482,11 +502,11 @@ multiclass sse12_move_rm, VEX, VEX_LIG; + IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>; def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>; + IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>; } defm MOVSS : sse12_move, XS; @@ -745,11 +765,13 @@ multiclass sse12_mov_packed opc, RegisterClass RC, bit IsReMaterializable = 1> { let neverHasSideEffects = 1 in def rr : PI; + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>, + Sched<[WriteMove]>; let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in def rm : PI; + [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>, + Sched<[WriteLoad]>; } defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, @@ -790,6 +812,7 @@ defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, TB, OpSize; +let SchedRW = [WriteStore] in { def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -822,6 +845,7 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>, VEX, VEX_L; +} // SchedRW // For disassembler let isCodeGenOnly = 1, hasSideEffects = 0 in { @@ -880,6 +904,7 @@ def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), (VMOVUPDYmr addr:$dst, VR256:$src)>; +let SchedRW = [WriteStore] in { def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -896,6 +921,7 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v2f64 VR128:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>; +} // SchedRW // For disassembler let isCodeGenOnly = 1, hasSideEffects = 0 in { @@ -1095,14 +1121,16 @@ multiclass sse12_mov_hilo_packed_baseopc, SDNode psnode, SDNode pdnode, [(set VR128:$dst, (psnode VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], - itin, SSEPackedSingle>, TB; + itin, SSEPackedSingle>, TB, + Sched<[WriteShuffleLd, ReadAfterLd]>; def PDrm : PI, TB, OpSize; + itin, SSEPackedDouble>, TB, OpSize, + Sched<[WriteShuffleLd, ReadAfterLd]>; } @@ -1123,6 +1151,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -1143,6 +1172,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // Shuffle with VMOVLPS @@ -1222,6 +1252,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -1246,6 +1277,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // VMOVHPS patterns @@ -1296,14 +1328,14 @@ let AddedComplexity = 20 in { [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), @@ -1311,13 +1343,13 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; } let Predicates = [HasAVX] in { @@ -1352,22 +1384,27 @@ def SSE_CVT_PD : OpndItins< IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_PS : OpndItins< IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_Scalar : OpndItins< IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_32 : OpndItins< IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_64 : OpndItins< IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SD2SI : OpndItins< IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM >; @@ -1377,10 +1414,10 @@ multiclass sse12_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI; + itins.rr>, Sched<[itins.Sched]>; def rm : SI; + itins.rm>, Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_p opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -1388,10 +1425,10 @@ multiclass sse12_cvt_p opc, RegisterClass SrcRC, RegisterClass DstRC, OpndItins itins> { let neverHasSideEffects = 1 in { def rr : I; + [], itins.rr, d>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm : I; + [], itins.rm, d>, Sched<[itins.Sched.Folded]>; } } @@ -1534,10 +1571,12 @@ multiclass sse12_cvt_sint opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI; + [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>, + Sched<[itins.Sched]>; def rm : SI; + [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_sint_3addr opc, RegisterClass SrcRC, @@ -1549,14 +1588,14 @@ multiclass sse12_cvt_sint_3addr opc, RegisterClass SrcRC, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SI; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, @@ -2193,12 +2232,13 @@ multiclass sse12_cmp_scalar; + itins.rr>, Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), (ld_frag addr:$src2), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { @@ -2241,12 +2281,14 @@ multiclass sse12_cmp_scalar_int; + itins.rr>, + Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, x86memop:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, (load addr:$src), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } // Aliases to match intrinsics which expect XMM operand(s). @@ -2276,12 +2318,14 @@ multiclass sse12_ord_cmp opc, RegisterClass RC, SDNode OpNode, def rr: PI; + IIC_SSE_COMIS_RR, d>, + Sched<[WriteFAdd]>; def rm: PI; + IIC_SSE_COMIS_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; } let Defs = [EFLAGS] in { @@ -2338,11 +2382,13 @@ multiclass sse12_cmp_packed; + IIC_SSE_CMPP_RR, d>, + Sched<[WriteFAdd]>; def rmi : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], - IIC_SSE_CMPP_RM, d>; + IIC_SSE_CMPP_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { @@ -2427,12 +2473,14 @@ multiclass sse12_shuffle; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffle]>; } defm VSHUFPS : sse12_shuffle opc, SDNode OpNode, ValueType vt, (outs RC:$dst), (ins RC:$src1, RC:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], - IIC_SSE_UNPCK, d>; + IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>; def rm : PI; + IIC_SSE_UNPCK, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, @@ -2613,10 +2662,11 @@ multiclass sse12_extr_sign_mask { def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>; + [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>, + Sched<[WriteVecLogic]>; def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], - IIC_SSE_MOVMSK, d>, REX_W; + IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>; } let Predicates = [HasAVX] in { @@ -2693,7 +2743,8 @@ multiclass PDI_binop_rm opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -2967,6 +3019,7 @@ let isCodeGenOnly = 1 in { /// /// And, we have a special variant form for a full-vector intrinsic form. +let Sched = WriteFSqrt in { def SSE_SQRTP : OpndItins< IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM >; @@ -2974,7 +3027,9 @@ def SSE_SQRTP : OpndItins< def SSE_SQRTS : OpndItins< IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM >; +} +let Sched = WriteFRcp in { def SSE_RCPP : OpndItins< IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM >; @@ -2982,6 +3037,7 @@ def SSE_RCPP : OpndItins< def SSE_RCPS : OpndItins< IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM >; +} /// sse1_fp_unop_s - SSE1 unops in scalar form. multiclass sse1_fp_unop_s opc, string OpcodeStr, @@ -2991,24 +3047,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR32:$src1, FR32:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3016,13 +3074,15 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; def SSr_Int : SSI; + [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SSm_Int : SSI; + [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. @@ -3033,24 +3093,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR32:$src1, FR32:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3058,17 +3120,17 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; let Constraints = "$src1 = $dst" in { def SSr_Int : SSI; + [], itins.rr>, Sched<[itins.Sched]>; let mayLoad = 1, hasSideEffects = 0 in def SSm_Int : SSI; + [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -3080,30 +3142,32 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm : PSI, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr : PSI, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm : PSI, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr : PSI; + [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PSm : PSI; + [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. @@ -3115,33 +3179,33 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int VR128:$src))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm_Int : PSI, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr_Int : PSI, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm_Int : PSI, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr_Int : PSI; + itins.rr>, Sched<[itins.Sched]>; def PSm_Int : PSI; + itins.rm>, Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_s - SSE2 unops in scalar form. @@ -3152,35 +3216,40 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR64:$src1, FR64:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SDm : SDI, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SDm_Int : SDI, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SDr : SDI; + [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>, + Sched<[itins.Sched]>; // See the comments in sse1_fp_unop_s for why this is OptForSize. def SDm : I, XD, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>; def SDr_Int : SDI; + [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SDm_Int : SDI; + [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_p - SSE2 unops in vector forms. @@ -3191,30 +3260,32 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PDm : PDI, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PDYr : PDI, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PDYm : PDI, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PDr : PDI; + [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PDm : PDI; + [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } // Square root. @@ -3305,52 +3376,48 @@ let Predicates = [UseSSE1] in { //===----------------------------------------------------------------------===// let AddedComplexity = 400 in { // Prefer non-temporal versions - def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; +let SchedRW = [WriteStore] in { +def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; +def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; - let ExeDomain = SSEPackedInt in - def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2i64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; +let ExeDomain = SSEPackedInt in +def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2i64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; - def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; +def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +let ExeDomain = SSEPackedInt in +def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4i64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; - def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v8f32 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - let ExeDomain = SSEPackedInt in - def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4i64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; -} - -let AddedComplexity = 400 in { // Prefer non-temporal versions def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)], @@ -3366,9 +3433,6 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], IIC_SSE_MOVNT>; -def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; - // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti{l}\t{$src, $dst|$dst, $src}", @@ -3380,7 +3444,14 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), [(nontemporalstore (i64 GR64:$src), addr:$dst)], IIC_SSE_MOVNT>, TB, Requires<[HasSSE2]>; -} +} // SchedRW = [WriteStore] + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; +} // AddedComplexity //===----------------------------------------------------------------------===// // SSE 1 & 2 - Prefetch and memory fence @@ -3450,7 +3521,7 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), let ExeDomain = SSEPackedInt in { // SSE integer instructions -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteMove] in { def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, VEX; @@ -3466,7 +3537,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), } // For Disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, @@ -3484,7 +3555,7 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), } let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, VEX; @@ -3501,7 +3572,7 @@ let Predicates = [HasAVX] in { } } -let mayStore = 1, neverHasSideEffects = 1 in { +let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, @@ -3520,6 +3591,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), } } +let SchedRW = [WriteMove] in { let neverHasSideEffects = 1 in def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -3538,9 +3610,10 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; } +} // SchedRW let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/], @@ -3552,7 +3625,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), XS, Requires<[UseSSE2]>; } -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/], @@ -3580,6 +3653,7 @@ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), // SSE2 - Packed Integer Arithmetic Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecIMul in def SSE_PMADD : OpndItins< IIC_SSE_PMADD, IIC_SSE_PMADD >; @@ -3598,14 +3672,15 @@ multiclass PDI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass PDI_binop_all_int opc, string OpcodeStr, Intrinsic IntId128, @@ -3639,20 +3714,22 @@ multiclass PDI_binop_rmi opc, bits<8> opc2, Format ImmForm, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))], - itins.rr>; + itins.rr>, Sched<[WriteVecShift]>; def rm : PDI; + (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>, + Sched<[WriteVecShiftLd, ReadAfterLd]>; def ri : PDIi8; + [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>, + Sched<[WriteVecShift]>; } /// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types @@ -3667,14 +3744,16 @@ multiclass PDI_binop_rm2 opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>; + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + Sched<[itins.Sched]>; def rm : PDI; + (bitconvert (memop_frag addr:$src2)))))]>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -3779,7 +3858,7 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def VPSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3825,7 +3904,7 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR256, v8i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 256-bit logical shifts. def VPSLLDQYri : PDIi8<0x73, MRM7r, (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), @@ -3871,7 +3950,7 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P>; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def PSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3966,14 +4045,15 @@ let Predicates = [HasAVX] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX; + IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, + Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX2] in { @@ -3983,14 +4063,15 @@ let Predicates = [HasAVX2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX, VEX_L; + IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L, + Sched<[WriteShuffleLd]>; } let Predicates = [UseSSE2] in { @@ -4000,14 +4081,15 @@ let Predicates = [UseSSE2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>; + IIC_SSE_PSHUF>, Sched<[WriteShuffle]>; def mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, + Sched<[WriteShuffleLd]>; } } } // ExeDomain = SSEPackedInt @@ -4043,7 +4125,7 @@ multiclass sse2_unpack opc, string OpcodeStr, ValueType vt, !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, Sched<[WriteShuffle]>; def rm : PDI opc, string OpcodeStr, ValueType vt, [(set VR128:$dst, (OpNode VR128:$src1, (bc_frag (memopv2i64 addr:$src2))))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } multiclass sse2_unpack_y opc, string OpcodeStr, ValueType vt, @@ -4060,12 +4143,14 @@ multiclass sse2_unpack_y opc, string OpcodeStr, ValueType vt, def Yrr : PDI; + [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>, + Sched<[WriteShuffle]>; def Yrm : PDI; + (bc_frag (memopv4i64 addr:$src2))))]>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4142,7 +4227,8 @@ multiclass sse2_pinsrw { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>; + (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffle]>; def rmi : Ii8<0xC4, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i16mem:$src2, i32i8imm:$src3), @@ -4151,7 +4237,8 @@ multiclass sse2_pinsrw { "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), - imm:$src3))], IIC_SSE_PINSRW>; + imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } // Extract @@ -4160,12 +4247,14 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))]>, TB, OpSize, VEX; + imm:$src2))]>, TB, OpSize, VEX, + Sched<[WriteShuffle]>; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))], IIC_SSE_PEXTRW>; + imm:$src2))], IIC_SSE_PEXTRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; // Insert let Predicates = [HasAVX] in { @@ -4173,7 +4262,7 @@ let Predicates = [HasAVX] in { def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, TB, OpSize, VEX_4V; + []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst" in @@ -4185,7 +4274,7 @@ let Constraints = "$src1 = $dst" in // SSE2 - Packed Mask Creation //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", @@ -4213,7 +4302,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), // SSE2 - Conditional Store //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in { let Uses = [EDI] in def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), @@ -4252,41 +4341,42 @@ def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteMove]>; def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>; + (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, + Sched<[WriteMove]>; def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; //===---------------------------------------------------------------------===// // Move Int Doubleword to Single Scalar @@ -4294,22 +4384,22 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int to Packed Double Int @@ -4317,26 +4407,29 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX, + Sched<[WriteMove]>; def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, + Sched<[WriteMove]>; def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int first element to Doubleword Int // +let SchedRW = [WriteMove] in { def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "vmov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), @@ -4349,6 +4442,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>; +} //SchedRW //===---------------------------------------------------------------------===// // Bitcast FR64 <-> GR64 @@ -4357,28 +4451,28 @@ let Predicates = [HasAVX] in def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, - VEX; + VEX, Sched<[WriteLoad]>; def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVD_ToGP>; + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // Move Scalar Single to Double Int