diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 5b1cf5a1a58..404a7e80e6c 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -355,6 +355,16 @@ namespace X86II { // XOPA - Prefix to encode 0xA in VEX.MMMM of XOP instructions. XOPA = 22 << Op0Shift, + // PD - Prefix code for packed double precision vector floating point + // operations performed in the SSE registers. + PD = 23 << Op0Shift, + + // T8PD - Prefix before and after 0x0F. Combination of T8 and PD. + T8PD = 24 << Op0Shift, + + // TAPD - Prefix before and after 0x0F. Combination of TA and PD. + TAPD = 25 << Op0Shift, + //===------------------------------------------------------------------===// // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. // They are used to specify GPRs and SSE registers, 64-bit operand size, diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 48bd6f19351..bd3c00626d6 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -717,6 +717,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::TA: // 0F 3A VEX_5M = 0x3; break; + case X86II::T8PD: // 66 0F 38 + VEX_PP = 0x1; + VEX_5M = 0x2; + break; case X86II::T8XS: // F3 0F 38 VEX_PP = 0x2; VEX_5M = 0x2; @@ -725,10 +729,17 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_PP = 0x3; VEX_5M = 0x2; break; + case X86II::TAPD: // 66 0F 3A + VEX_PP = 0x1; + VEX_5M = 0x3; + break; case X86II::TAXD: // F2 0F 3A VEX_PP = 0x3; VEX_5M = 0x3; break; + case X86II::PD: // 66 0F + VEX_PP = 0x1; + break; case X86II::XS: // F3 0F VEX_PP = 0x2; break; @@ -1215,6 +1226,12 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::A7: // 0F A7 Need0FPrefix = true; break; + case X86II::PD: // 66 0F + case X86II::T8PD: // 66 0F 38 + case X86II::TAPD: // 66 0F 3A + EmitByte(0x66, CurByte, OS); + Need0FPrefix = true; + break; case X86II::XS: // F3 0F case X86II::T8XS: // F3 0F 38 EmitByte(0xF3, CurByte, OS); @@ -1252,11 +1269,13 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // FIXME: Pull this up into previous switch if REX can be moved earlier. switch (TSFlags & X86II::Op0Mask) { + case X86II::T8PD: // 66 0F 38 case X86II::T8XS: // F3 0F 38 case X86II::T8XD: // F2 0F 38 case X86II::T8: // 0F 38 EmitByte(0x38, CurByte, OS); break; + case X86II::TAPD: // 66 0F 3A case X86II::TAXD: // F2 0F 3A case X86II::TA: // 0F 3A EmitByte(0x3A, CurByte, OS); diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 072996679bc..5dba4ecbfdb 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -696,6 +696,12 @@ void Emitter::emitOpcodePrefix(uint64_t TSFlags, Need0FPrefix = true; break; case X86II::REP: break; // already handled. + case X86II::PD: // 66 0F + case X86II::T8PD: // 66 0F 38 + case X86II::TAPD: // 66 0F 3A + MCE.emitByte(0x66); + Need0FPrefix = true; + break; case X86II::T8XS: // F3 0F 38 case X86II::XS: // F3 0F MCE.emitByte(0xF3); @@ -728,11 +734,13 @@ void Emitter::emitOpcodePrefix(uint64_t TSFlags, MCE.emitByte(0x0F); switch (Desc->TSFlags & X86II::Op0Mask) { + case X86II::T8PD: // 66 0F 38 case X86II::T8XD: // F2 0F 38 case X86II::T8XS: // F3 0F 38 case X86II::T8: // 0F 38 MCE.emitByte(0x38); break; + case X86II::TAPD: // 66 0F 38 case X86II::TAXD: // F2 0F 38 case X86II::TA: // 0F 3A MCE.emitByte(0x3A); @@ -882,6 +890,10 @@ void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, case X86II::TA: // 0F 3A VEX_5M = 0x3; break; + case X86II::T8PD: // 66 0F 38 + VEX_PP = 0x1; + VEX_5M = 0x2; + break; case X86II::T8XS: // F3 0F 38 VEX_PP = 0x2; VEX_5M = 0x2; @@ -890,10 +902,17 @@ void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, VEX_PP = 0x3; VEX_5M = 0x2; break; + case X86II::TAPD: // 66 0F 3A + VEX_PP = 0x1; + VEX_5M = 0x3; + break; case X86II::TAXD: // F2 0F 3A VEX_PP = 0x3; VEX_5M = 0x3; break; + case X86II::PD: // 66 0F + VEX_PP = 0x1; + break; case X86II::XS: // F3 0F VEX_PP = 0x2; break; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index c1c3c3cff2d..a5d7ed0b6be 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -744,12 +744,12 @@ multiclass avx512_icmp_packed opc, string OpcodeStr, RegisterClass KRC, defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem, memopv16i32, X86pcmpeqm, v16i32>, EVEX_V512; defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem, - memopv8i64, X86pcmpeqm, v8i64>, T8, EVEX_V512, VEX_W; + memopv8i64, X86pcmpeqm, v8i64>, T8PD, EVEX_V512, VEX_W; defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem, memopv16i32, X86pcmpgtm, v16i32>, EVEX_V512; defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem, - memopv8i64, X86pcmpgtm, v8i64>, T8, EVEX_V512, VEX_W; + memopv8i64, X86pcmpgtm, v8i64>, T8PD, EVEX_V512, VEX_W; def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), (COPY_TO_REGCLASS (VPCMPGTDZrr @@ -843,7 +843,7 @@ multiclass avx512_cmp_packed, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VCMPPDZ : avx512_cmp_packed, OpSize, EVEX_4V, VEX_W, EVEX_V512, + "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), @@ -1103,7 +1103,7 @@ multiclass avx512_mask_unpck opc, string OpcodeStr, multiclass avx512_mask_unpck_bw opc, string OpcodeStr> { defm BW : avx512_mask_unpck, - VEX_4V, VEX_L, OpSize, TB; + VEX_4V, VEX_L, PD; } defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">; @@ -1155,7 +1155,7 @@ multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, SDNode OpNode> { defm W : avx512_mask_shiftop, - VEX, OpSize, TA, VEX_W; + VEX, TAPD, VEX_W; } defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>; @@ -1228,14 +1228,14 @@ defm VMOVAPSZ : avx512_mov_packed<0x28, VR512, VK16WM, f512mem, alignedloadv16f3 EVEX_V512, EVEX_CD8<32, CD8VF>; defm VMOVAPDZ : avx512_mov_packed<0x28, VR512, VK8WM, f512mem, alignedloadv8f64, "vmovapd", SSEPackedDouble>, - OpSize, EVEX_V512, VEX_W, + PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVUPSZ : avx512_mov_packed<0x10, VR512, VK16WM, f512mem, loadv16f32, "vmovups", SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VMOVUPDZ : avx512_mov_packed<0x10, VR512, VK8WM, f512mem, loadv8f64, "vmovupd", SSEPackedDouble>, - OpSize, EVEX_V512, VEX_W, + PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; def VMOVAPSZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), "vmovaps\t{$src, $dst|$dst, $src}", @@ -1245,7 +1245,7 @@ def VMOVAPDZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$sr "vmovapd\t{$src, $dst|$dst, $src}", [(alignedstore512 (v8f64 VR512:$src), addr:$dst)], SSEPackedDouble>, EVEX, EVEX_V512, - OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + PD, VEX_W, EVEX_CD8<64, CD8VF>; def VMOVUPSZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), "vmovups\t{$src, $dst|$dst, $src}", [(store (v16f32 VR512:$src), addr:$dst)], @@ -1254,7 +1254,7 @@ def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$sr "vmovupd\t{$src, $dst|$dst, $src}", [(store (v8f64 VR512:$src), addr:$dst)], SSEPackedDouble>, EVEX, EVEX_V512, - OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + PD, VEX_W, EVEX_CD8<64, CD8VF>; let hasSideEffects = 0 in { def VMOVDQA32rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst), @@ -1421,7 +1421,7 @@ def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), (iPTR 0)))], - IIC_SSE_MOVD_ToGP>, TB, OpSize, EVEX, VEX_LIG, VEX_W, + IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W, Requires<[HasAVX512, In64BitMode]>; def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs), @@ -1429,7 +1429,7 @@ def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs), "vmovq\t{$src, $dst|$dst, $src}", [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), addr:$dst)], IIC_SSE_MOVDQ>, - EVEX, OpSize, VEX_LIG, VEX_W, TB, EVEX_CD8<64, CD8VT1>, + EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>; // Move Scalar Single to Double Int @@ -1770,7 +1770,7 @@ defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VR512, memopv16i32, defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VR512, memopv16i32, i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, - T8, EVEX_V512, EVEX_CD8<32, CD8VF>; + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VR512, memopv8i64, i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 1>, @@ -1781,7 +1781,7 @@ defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VR512, memopv8i64, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, - VR512, memopv8i64, i512mem, SSE_INTALU_ITINS_P, 1>, T8, + VR512, memopv8i64, i512mem, SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, @@ -1800,31 +1800,31 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1), defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VR512, memopv16i32, i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, - T8, EVEX_V512, EVEX_CD8<32, CD8VF>; + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VR512, memopv8i64, i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, - T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VR512, memopv16i32, i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, - T8, EVEX_V512, EVEX_CD8<32, CD8VF>; + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VR512, memopv8i64, i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, - T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VR512, memopv16i32, i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, - T8, EVEX_V512, EVEX_CD8<32, CD8VF>; + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VR512, memopv8i64, i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, - T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VR512, memopv16i32, i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, - T8, EVEX_V512, EVEX_CD8<32, CD8VF>; + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VR512, memopv8i64, i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, - T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1), (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), @@ -1876,13 +1876,13 @@ defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64, SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64, VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64, VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64, VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, RegisterClass RC, PatFrag memop_frag, @@ -1935,15 +1935,15 @@ multiclass avx512_pshuf_imm opc, string OpcodeStr, RegisterClass RC, } defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32, - i512mem, v16i32>, OpSize, EVEX_V512, EVEX_CD8<32, CD8VF>; + i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; let ExeDomain = SSEPackedSingle in defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp, - memopv16f32, i512mem, v16f32>, OpSize, TA, EVEX_V512, + memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512, EVEX_CD8<32, CD8VF>; let ExeDomain = SSEPackedDouble in defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp, - memopv8f64, i512mem, v8f64>, OpSize, TA, EVEX_V512, + memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VF>; def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))), @@ -2038,7 +2038,7 @@ defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VR512, v16f32, f512mem, defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VR512, v8f64, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VR512, v16f32, f512mem, memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, @@ -2046,7 +2046,7 @@ defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VR512, v16f32, f512mem, defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VR512, v8f64, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VR512, v16f32, f512mem, memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, @@ -2060,11 +2060,11 @@ defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VR512, v16f32, f512mem, defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VR512, v8f64, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VR512, v8f64, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VR512, v16f32, f512mem, memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, @@ -2076,11 +2076,11 @@ defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VR512, v16f32, f512mem, defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VR512, v8f64, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, SSE_ALU_ITINS_P.d, 0>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VR512, v8f64, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, SSE_ALU_ITINS_P.d, 0>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1), (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)), @@ -2840,7 +2840,7 @@ let hasSideEffects = 0 in { defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround, memopv8f64, f512mem, v8f32, v8f64, - SSEPackedSingle>, EVEX_V512, VEX_W, OpSize, + SSEPackedSingle>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend, @@ -2877,7 +2877,7 @@ defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, memopv8f64, f512mem, v8i32, v8f64, - SSEPackedDouble>, EVEX_V512, OpSize, VEX_W, + SSEPackedDouble>, EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint, @@ -2946,7 +2946,7 @@ let hasSideEffects = 0 in { } defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512, - memopv16f32, f512mem, SSEPackedSingle>, OpSize, + memopv16f32, f512mem, SSEPackedSingle>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X, memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W, @@ -3019,14 +3019,14 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { "ucomiss">, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, - "ucomisd">, TB, OpSize, EVEX, + "ucomisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; let Pattern = [] in { defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load, "comiss">, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load, - "comisd">, TB, OpSize, EVEX, + "comisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } let isCodeGenOnly = 1 in { @@ -3034,14 +3034,14 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { load, "ucomiss">, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem, - load, "ucomisd">, TB, OpSize, EVEX, + load, "ucomisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem, load, "comiss">, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem, - load, "comisd">, TB, OpSize, EVEX, + load, "comisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } } @@ -3796,7 +3796,7 @@ multiclass avx512_shufp, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VSHUFPDZ : avx512_shufp, OpSize, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))), (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>; diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index ad2b00e14ff..afa69507a74 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1363,21 +1363,21 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { let SchedRW = [WriteALU] in { def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "adcx{l}\t{$src, $dst|$dst, $src}", - [], IIC_BIN_NONMEM>, T8, OpSize; + [], IIC_BIN_NONMEM>, T8PD; def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "adcx{q}\t{$src, $dst|$dst, $src}", - [], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; + [], IIC_BIN_NONMEM>, T8PD, REX_W, Requires<[In64BitMode]>; } // SchedRW let mayLoad = 1, SchedRW = [WriteALULd] in { def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "adcx{l}\t{$src, $dst|$dst, $src}", - [], IIC_BIN_MEM>, T8, OpSize; + [], IIC_BIN_MEM>, T8PD; def ADCX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "adcx{q}\t{$src, $dst|$dst, $src}", - [], IIC_BIN_MEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; + [], IIC_BIN_MEM>, T8PD, REX_W, Requires<[In64BitMode]>; } } diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index c17815a4e6d..2cc5339ef8a 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -138,6 +138,9 @@ class TAXD { bits<5> Prefix = 19; } class XOP8 { bits<5> Prefix = 20; } class XOP9 { bits<5> Prefix = 21; } class XOPA { bits<5> Prefix = 22; } +class PD { bits<5> Prefix = 23; } +class T8PD { bits<5> Prefix = 24; } +class TAPD { bits<5> Prefix = 25; } class VEX { bit hasVEXPrefix = 1; } class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } @@ -340,6 +343,7 @@ class Iseg32 o, Format f, dag outs, dag ins, string asm, def __xs : XS; def __xd : XD; +def __pd : PD; // SI - SSE 1 & 2 scalar instructions class SI o, Format F, dag outs, dag ins, string asm, @@ -349,7 +353,7 @@ class SI o, Format F, dag outs, dag ins, string asm, !if(hasVEXPrefix /* VEX */, [UseAVX], !if(!eq(Prefix, __xs.Prefix), [UseSSE1], !if(!eq(Prefix, __xd.Prefix), [UseSSE2], - !if(hasOpSizePrefix, [UseSSE2], [UseSSE1]))))); + !if(!eq(Prefix, __pd.Prefix), [UseSSE2], [UseSSE1]))))); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -373,7 +377,7 @@ class PI o, Format F, dag outs, dag ins, string asm, list pattern, : I { let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512], !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]))); + !if(!eq(Prefix, __pd.Prefix), [UseSSE2], [UseSSE1]))); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -383,7 +387,7 @@ class PI o, Format F, dag outs, dag ins, string asm, list pattern, class MMXPI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin, Domain d> : I { - let Predicates = !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]); + let Predicates = !if(!eq(Prefix, __pd.Prefix), [HasSSE2], [HasSSE1]); } // PIi8 - SSE 1 & 2 packed instructions with immediate @@ -392,7 +396,7 @@ class PIi8 o, Format F, dag outs, dag ins, string asm, : Ii8 { let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512], !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]))); + !if(!eq(Prefix, __pd.Prefix), [UseSSE2], [UseSSE1]))); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -435,13 +439,13 @@ class VPSI o, Format F, dag outs, dag ins, string asm, // SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix. // S2SI - SSE2 instructions with XS prefix. // SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix. -// PDI - SSE2 instructions with TB and OpSize prefixes, packed double domain. -// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. +// PDI - SSE2 instructions with PD prefix, packed double domain. +// PDIi8 - SSE2 instructions with ImmT == Imm8 and PD prefix. // VSDI - SSE2 scalar instructions with XD prefix in AVX form. -// VPDI - SSE2 vector instructions with TB and OpSize prefixes in AVX form, +// VPDI - SSE2 vector instructions with PD prefix in AVX form, // packed double domain. -// VS2I - SSE2 scalar instructions with TB and OpSize prefixes in AVX form. -// S2I - SSE2 scalar instructions with TB and OpSize prefixes. +// VS2I - SSE2 scalar instructions with PD prefix in AVX form. +// S2I - SSE2 scalar instructions with PD prefix. // MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix as well as // MMX operands. // MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as @@ -461,11 +465,11 @@ class S2SIi8 o, Format F, dag outs, dag ins, string asm, : Ii8, XS, Requires<[UseSSE2]>; class PDI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, OpSize, + : I, PD, Requires<[UseSSE2]>; class PDIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TB, OpSize, + : Ii8, PD, Requires<[UseSSE2]>; class VSDI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> @@ -477,16 +481,15 @@ class VS2SI o, Format F, dag outs, dag ins, string asm, Requires<[HasAVX]>; class VPDI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, - OpSize, Requires<[HasAVX]>; + : I, + PD, Requires<[HasAVX]>; class VS2I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, - OpSize, Requires<[UseAVX]>; + : I, PD, + Requires<[UseAVX]>; class S2I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, - OpSize, Requires<[UseSSE2]>; + : I, PD, Requires<[UseSSE2]>; class MMXSDIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : Ii8, XD, Requires<[HasSSE2]>; @@ -496,7 +499,7 @@ class MMXS2SIi8 o, Format F, dag outs, dag ins, string asm, // SSE3 Instruction Templates: // -// S3I - SSE3 instructions with TB and OpSize prefixes. +// S3I - SSE3 instructions with PD prefixes. // S3SI - SSE3 instructions with XS prefix. // S3DI - SSE3 instructions with XD prefix. @@ -510,7 +513,7 @@ class S3DI o, Format F, dag outs, dag ins, string asm, Requires<[UseSSE3]>; class S3I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, OpSize, + : I, PD, Requires<[UseSSE3]>; @@ -527,11 +530,11 @@ class S3I o, Format F, dag outs, dag ins, string asm, class SS38I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, T8, + : I, T8PD, Requires<[UseSSSE3]>; class SS3AI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, + : Ii8, TAPD, Requires<[UseSSSE3]>; class MMXSS38I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> @@ -549,11 +552,11 @@ class MMXSS3AI o, Format F, dag outs, dag ins, string asm, // class SS48I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, T8, + : I, T8PD, Requires<[UseSSE41]>; class SS4AIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, + : Ii8, TAPD, Requires<[UseSSE41]>; // SSE4.2 Instruction Templates: @@ -561,7 +564,7 @@ class SS4AIi8 o, Format F, dag outs, dag ins, string asm, // SS428I - SSE 4.2 instructions with T8 prefix. class SS428I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, T8, + : I, T8PD, Requires<[UseSSE42]>; // SS42FI - SSE 4.2 instructions with T8XD prefix. @@ -573,53 +576,53 @@ class SS42FI o, Format F, dag outs, dag ins, string asm, // SS42AI = SSE 4.2 instructions with TA prefix class SS42AI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, + : Ii8, TAPD, Requires<[UseSSE42]>; // AVX Instruction Templates: // Instructions introduced in AVX (no SSE equivalent forms) // -// AVX8I - AVX instructions with T8 and OpSize prefix. -// AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8. +// AVX8I - AVX instructions with T8PD prefix. +// AVXAIi8 - AVX instructions with TAPD prefix and ImmT = Imm8. class AVX8I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, T8, OpSize, + : I, T8PD, Requires<[HasAVX]>; class AVXAIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, OpSize, + : Ii8, TAPD, Requires<[HasAVX]>; // AVX2 Instruction Templates: // Instructions introduced in AVX2 (no SSE equivalent forms) // -// AVX28I - AVX2 instructions with T8 and OpSize prefix. -// AVX2AIi8 - AVX2 instructions with TA, OpSize prefix and ImmT = Imm8. +// AVX28I - AVX2 instructions with T8PD prefix. +// AVX2AIi8 - AVX2 instructions with TAPD prefix and ImmT = Imm8. class AVX28I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, T8, OpSize, + : I, T8PD, Requires<[HasAVX2]>; class AVX2AIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, OpSize, + : Ii8, TAPD, Requires<[HasAVX2]>; // AVX-512 Instruction Templates: // Instructions introduced in AVX-512 (no SSE equivalent forms) // -// AVX5128I - AVX-512 instructions with T8 and OpSize prefix. -// AVX512AIi8 - AVX-512 instructions with TA, OpSize prefix and ImmT = Imm8. -// AVX512PDI - AVX-512 instructions with TB, OpSize, double packed. +// AVX5128I - AVX-512 instructions with T8PD prefix. +// AVX512AIi8 - AVX-512 instructions with TAPD prefix and ImmT = Imm8. +// AVX512PDI - AVX-512 instructions with PD, double packed. // AVX512PSI - AVX-512 instructions with TB, single packed. // AVX512XS8I - AVX-512 instructions with T8 and XS prefixes. // AVX512XSI - AVX-512 instructions with XS prefix, generic domain. -// AVX512BI - AVX-512 instructions with TB, OpSize, int packed domain. -// AVX512SI - AVX-512 scalar instructions with TB and OpSize prefixes. +// AVX512BI - AVX-512 instructions with PD, int packed domain. +// AVX512SI - AVX-512 scalar instructions with PD prefix. class AVX5128I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, T8, OpSize, + : I, T8PD, Requires<[HasAVX512]>; class AVX512XS8I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> @@ -635,28 +638,28 @@ class AVX512XDI o, Format F, dag outs, dag ins, string asm, Requires<[HasAVX512]>; class AVX512BI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, OpSize, + : I, PD, Requires<[HasAVX512]>; class AVX512BIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TB, OpSize, + : Ii8, PD, Requires<[HasAVX512]>; class AVX512SI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, OpSize, + : I, PD, Requires<[HasAVX512]>; class AVX512AIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, OpSize, + : Ii8, TAPD, Requires<[HasAVX512]>; class AVX512Ii8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : Ii8, TB, - Requires<[HasAVX512]>; + Requires<[HasAVX512]>; class AVX512PDI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, - OpSize, Requires<[HasAVX512]>; + : I, PD, + Requires<[HasAVX512]>; class AVX512PSI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : I, TB, @@ -669,8 +672,8 @@ class AVX512PI o, Format F, dag outs, dag ins, string asm, : I, TB, Requires<[HasAVX512]>; class AVX512FMA3 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = NoItinerary> - : I, T8, - OpSize, EVEX_4V, Requires<[HasAVX512]>; + : I, T8PD, + EVEX_4V, Requires<[HasAVX512]>; // AES Instruction Templates: // @@ -678,36 +681,36 @@ class AVX512FMA3 o, Format F, dag outs, dag ins, string asm, // These use the same encoding as the SSE4.2 T8 and TA encodings. class AES8I o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = IIC_AES> - : I, T8, + : I, T8PD, Requires<[HasAES]>; class AESAI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, + : Ii8, TAPD, Requires<[HasAES]>; // PCLMUL Instruction Templates class PCLMULIi8 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, - OpSize, Requires<[HasPCLMUL]>; + : Ii8, TAPD, + Requires<[HasPCLMUL]>; class AVXPCLMULIi8 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, - OpSize, VEX_4V, Requires<[HasAVX, HasPCLMUL]>; + : Ii8, TAPD, + VEX_4V, Requires<[HasAVX, HasPCLMUL]>; // FMA3 Instruction Templates class FMA3 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = NoItinerary> - : I, T8, - OpSize, VEX_4V, FMASC, Requires<[HasFMA]>; + : I, T8PD, + VEX_4V, FMASC, Requires<[HasFMA]>; // FMA4 Instruction Templates class FMA4 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, - OpSize, VEX_4V, VEX_I8IMM, FMASC, Requires<[HasFMA4]>; + : Ii8, TAPD, + VEX_4V, VEX_I8IMM, FMASC, Requires<[HasFMA4]>; // XOP 2, 3 and 4 Operand Instruction Template class IXOP o, Format F, dag outs, dag ins, string asm, @@ -724,8 +727,8 @@ class IXOPi8 o, Format F, dag outs, dag ins, string asm, // XOP 5 operand instruction (VEX encoding!) class IXOP5 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, - OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>; + : Ii8, TAPD, + VEX_4V, VEX_I8IMM, Requires<[HasXOP]>; // X86-64 Instruction templates... // @@ -782,7 +785,7 @@ class VRS2I o, Format F, dag outs, dag ins, string asm, // MMXI - MMX instructions with TB prefix. // MMXI32 - MMX instructions with TB prefix valid only in 32 bit mode. // MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode. -// MMX2I - MMX / SSE2 instructions with TB and OpSize prefixes. +// MMX2I - MMX / SSE2 instructions with PD prefix. // MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix. // MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix. // MMXID - MMX instructions with XD prefix. @@ -801,7 +804,7 @@ class MMXRI o, Format F, dag outs, dag ins, string asm, : I, TB, REX_W, Requires<[HasMMX]>; class MMX2I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, OpSize, Requires<[HasMMX]>; + : I, PD, Requires<[HasMMX]>; class MMXIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : Ii8, TB, Requires<[HasMMX]>; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index ba58143e89e..5126313de6d 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -527,16 +527,16 @@ defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi, MMX_CVT_PS_ITINS, SSEPackedSingle>, TB; defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi, f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}", - MMX_CVT_PD_ITINS, SSEPackedDouble>, TB, OpSize; + MMX_CVT_PD_ITINS, SSEPackedDouble>, PD; defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi, f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}", MMX_CVT_PS_ITINS, SSEPackedSingle>, TB; defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi, f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}", - MMX_CVT_PD_ITINS, SSEPackedDouble>, TB, OpSize; + MMX_CVT_PD_ITINS, SSEPackedDouble>, PD; defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd, i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}", - MMX_CVT_PD_ITINS, SSEPackedDouble>, TB, OpSize; + MMX_CVT_PD_ITINS, SSEPackedDouble>, PD; let Constraints = "$src1 = $dst" in { defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128, int_x86_sse_cvtpi2ps, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5bc8f3330bc..8cf08ad75f2 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -815,38 +815,38 @@ defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, TB, VEX; defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, - TB, OpSize, VEX; + PD, VEX; defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", SSEPackedSingle, SSE_MOVU_ITINS>, TB, VEX; defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, - TB, OpSize, VEX; + PD, VEX; defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, TB, VEX, VEX_L; defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, - TB, OpSize, VEX, VEX_L; + PD, VEX, VEX_L; defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", SSEPackedSingle, SSE_MOVU_ITINS>, TB, VEX, VEX_L; defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, - TB, OpSize, VEX, VEX_L; + PD, VEX, VEX_L; defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, TB; defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, - TB, OpSize; + PD; defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", SSEPackedSingle, SSE_MOVU_ITINS>, TB; defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, - TB, OpSize; + PD; let SchedRW = [WriteStore] in { def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), @@ -1150,7 +1150,7 @@ multiclass sse12_mov_hilo_packed_baseopc, SDNode psnode, SDNode pdnode, !strconcat(base_opc, "d", asm_opr), [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], - itin, SSEPackedDouble>, TB, OpSize, + itin, SSEPackedDouble>, PD, Sched<[WriteShuffleLd, ReadAfterLd]>; } @@ -2393,47 +2393,47 @@ let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss">, TB, VEX, VEX_LIG; defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd">, TB, OpSize, VEX, VEX_LIG; + "ucomisd">, PD, VEX, VEX_LIG; let Pattern = [] in { defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, "comiss">, TB, VEX, VEX_LIG; defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, - "comisd">, TB, OpSize, VEX, VEX_LIG; + "comisd">, PD, VEX, VEX_LIG; } let isCodeGenOnly = 1 in { defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, load, "ucomiss">, TB, VEX; defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd">, TB, OpSize, VEX; + load, "ucomisd">, PD, VEX; defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, "comiss">, TB, VEX; defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, - load, "comisd">, TB, OpSize, VEX; + load, "comisd">, PD, VEX; } defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss">, TB; defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd">, TB, OpSize; + "ucomisd">, PD; let Pattern = [] in { defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, "comiss">, TB; defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, - "comisd">, TB, OpSize; + "comisd">, PD; } let isCodeGenOnly = 1 in { defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, load, "ucomiss">, TB; defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd">, TB, OpSize; + load, "ucomisd">, PD; defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, "comiss">, TB; defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, - "comisd">, TB, OpSize; + "comisd">, PD; } } // Defs = [EFLAGS] @@ -2472,7 +2472,7 @@ defm VCMPPS : sse12_cmp_packed, TB, OpSize, VEX_4V; + SSEPackedDouble>, PD, VEX_4V; defm VCMPPSY : sse12_cmp_packed, TB, OpSize, VEX_4V, VEX_L; + SSEPackedDouble>, PD, VEX_4V, VEX_L; let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed, TB, OpSize; + SSEPackedDouble, SSE_ALU_F64P>, PD; } let Predicates = [HasAVX] in { @@ -2555,20 +2555,18 @@ defm VSHUFPSY : sse12_shuffle, TB, VEX_4V, VEX_L; defm VSHUFPD : sse12_shuffle, TB, OpSize, VEX_4V; + loadv2f64, SSEPackedDouble>, PD, VEX_4V; defm VSHUFPDY : sse12_shuffle, TB, OpSize, VEX_4V, VEX_L; + loadv4f64, SSEPackedDouble>, PD, VEX_4V, VEX_L; let Constraints = "$src1 = $dst" in { defm SHUFPS : sse12_shuffle, - TB; + memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>, TB; defm SHUFPD : sse12_shuffle, - TB, OpSize; + memopv2f64, SSEPackedDouble, 1 /* cvt to pshufd */>, PD; } let Predicates = [HasAVX] in { @@ -2643,26 +2641,26 @@ defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32, SSEPackedSingle>, TB, VEX_4V; defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64, VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, TB, OpSize, VEX_4V; + SSEPackedDouble>, PD, VEX_4V; defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32, VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedSingle>, TB, VEX_4V; defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64, VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, TB, OpSize, VEX_4V; + SSEPackedDouble>, PD, VEX_4V; defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32, VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedSingle>, TB, VEX_4V, VEX_L; defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64, VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; + SSEPackedDouble>, PD, VEX_4V, VEX_L; defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32, VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedSingle>, TB, VEX_4V, VEX_L; defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64, VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; + SSEPackedDouble>, PD, VEX_4V, VEX_L; let Constraints = "$src1 = $dst" in { defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, @@ -2670,13 +2668,13 @@ let Constraints = "$src1 = $dst" in { SSEPackedSingle>, TB; defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64, VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", - SSEPackedDouble>, TB, OpSize; + SSEPackedDouble>, PD; defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32, VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", SSEPackedSingle>, TB; defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64, VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", - SSEPackedDouble>, TB, OpSize; + SSEPackedDouble>, PD; } // Constraints = "$src1 = $dst" let Predicates = [HasAVX1Only] in { @@ -2734,14 +2732,13 @@ let Predicates = [HasAVX] in { defm VMOVMSKPS : sse12_extr_sign_mask, TB, VEX; defm VMOVMSKPD : sse12_extr_sign_mask, TB, - OpSize, VEX; + "movmskpd", SSEPackedDouble>, PD, VEX; defm VMOVMSKPSY : sse12_extr_sign_mask, TB, VEX, VEX_L; defm VMOVMSKPDY : sse12_extr_sign_mask, TB, - OpSize, VEX, VEX_L; + "movmskpd", SSEPackedDouble>, PD, + VEX, VEX_L; def : Pat<(i32 (X86fgetsign FR32:$src)), (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; @@ -2758,7 +2755,7 @@ let Predicates = [HasAVX] in { defm MOVMSKPS : sse12_extr_sign_mask, TB; defm MOVMSKPD : sse12_extr_sign_mask, TB, OpSize; + SSEPackedDouble>, PD; def : Pat<(i32 (X86fgetsign FR32:$src)), (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>, @@ -2845,7 +2842,7 @@ multiclass sse12_fp_alias_pack_logical opc, string OpcodeStr, defm V#NAME#PD : sse12_fp_packed, - TB, OpSize, VEX_4V; + PD, VEX_4V; let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed opc, string OpcodeStr, defm PD : sse12_fp_packed, - TB, OpSize; + PD; } } @@ -2888,7 +2885,7 @@ multiclass sse12_fp_packed_logical opc, string OpcodeStr, (bc_v4i64 (v4f64 VR256:$src2))))], [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), (loadv4i64 addr:$src2)))], 0>, - TB, OpSize, VEX_4V, VEX_L; + PD, VEX_4V, VEX_L; // In AVX no need to add a pattern for 128-bit logical rr ps, because they // are all promoted to v2i64, and the patterns are covered by the int @@ -2905,7 +2902,7 @@ multiclass sse12_fp_packed_logical opc, string OpcodeStr, (bc_v2i64 (v2f64 VR128:$src2))))], [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), (loadv2i64 addr:$src2)))], 0>, - TB, OpSize, VEX_4V; + PD, VEX_4V; let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm opc, string OpcodeStr, [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), (bc_v2i64 (v2f64 VR128:$src2))))], [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))]>, TB, OpSize; + (memopv2i64 addr:$src2)))]>, PD; } } @@ -2953,14 +2950,14 @@ multiclass basic_sse12_fp_binop_p opc, string OpcodeStr, SSEPackedSingle, itins.s, 0>, TB, VEX_4V; defm V#NAME#PD : sse12_fp_packed, TB, OpSize, VEX_4V; + SSEPackedDouble, itins.d, 0>, PD, VEX_4V; defm V#NAME#PSY : sse12_fp_packed, TB, VEX_4V, VEX_L; defm V#NAME#PDY : sse12_fp_packed, TB, OpSize, VEX_4V, VEX_L; + SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_L; let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed opc, string OpcodeStr, itins.s>, TB; defm PD : sse12_fp_packed, TB, OpSize; + itins.d>, PD; } } @@ -4354,7 +4351,7 @@ let Predicates = [UseSSE2] in { } } // ExeDomain = SSEPackedInt -defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, TB, OpSize; +defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, PD; defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS; defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD; @@ -4507,7 +4504,7 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))]>, TB, OpSize, VEX, + imm:$src2))]>, PD, VEX, Sched<[WriteShuffle]>; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -4518,10 +4515,10 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg, // Insert let Predicates = [HasAVX] in -defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V; +defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V; let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in -defm PINSRW : sse2_pinsrw, TB, OpSize; +defm PINSRW : sse2_pinsrw, PD; } // ExeDomain = SSEPackedInt @@ -5160,24 +5157,24 @@ multiclass sse3_addsub, TB, XD, VEX_4V; + f128mem, SSE_ALU_F32P, 0>, XD, VEX_4V; defm VADDSUBPSY : sse3_addsub, TB, XD, VEX_4V, VEX_L; + f256mem, SSE_ALU_F32P, 0>, XD, VEX_4V, VEX_L; } let ExeDomain = SSEPackedDouble in { defm VADDSUBPD : sse3_addsub, TB, OpSize, VEX_4V; + f128mem, SSE_ALU_F64P, 0>, PD, VEX_4V; defm VADDSUBPDY : sse3_addsub, TB, OpSize, VEX_4V, VEX_L; + f256mem, SSE_ALU_F64P, 0>, PD, VEX_4V, VEX_L; } } let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { let ExeDomain = SSEPackedSingle in defm ADDSUBPS : sse3_addsub, TB, XD; + f128mem, SSE_ALU_F32P>, XD; let ExeDomain = SSEPackedDouble in defm ADDSUBPD : sse3_addsub, TB, OpSize; + f128mem, SSE_ALU_F64P>, PD; } //===---------------------------------------------------------------------===// @@ -5264,7 +5261,7 @@ multiclass SS3I_unop_rm_int opc, string OpcodeStr, (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>, - OpSize, Sched<[WriteVecALU]>; + Sched<[WriteVecALU]>; def rm128 : SS38I opc, string OpcodeStr, [(set VR128:$dst, (IntId128 (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>, - OpSize, Sched<[WriteVecALULd]>; + Sched<[WriteVecALULd]>; } /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. @@ -5282,14 +5279,14 @@ multiclass SS3I_unop_rm_int_y opc, string OpcodeStr, (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (IntId256 VR256:$src))]>, - OpSize, Sched<[WriteVecALU]>; + Sched<[WriteVecALU]>; def rm256 : SS38I, OpSize, + (bitconvert (memopv4i64 addr:$src))))]>, Sched<[WriteVecALULd]>; } @@ -5409,7 +5406,7 @@ multiclass SS3I_binop_rm opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, - OpSize, Sched<[itins.Sched]>; + Sched<[itins.Sched]>; def rm : SS38I opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, - (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize, + (bitconvert (memop_frag addr:$src2)))))], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } @@ -5432,7 +5429,7 @@ multiclass SS3I_binop_rm_int opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize, Sched<[itins.Sched]>; + Sched<[itins.Sched]>; def rm128 : SS38I opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv2i64 addr:$src2))))]>, OpSize, + (bitconvert (memopv2i64 addr:$src2))))]>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } @@ -5450,14 +5447,12 @@ multiclass SS3I_binop_rm_int_y opc, string OpcodeStr, def rr256 : SS38I, - OpSize; + [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>; def rm256 : SS38I, OpSize; + (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>; } let ImmT = NoImm, Predicates = [HasAVX] in { @@ -5583,7 +5578,7 @@ multiclass ssse3_palignr { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNRR>, OpSize, Sched<[WriteShuffle]>; + [], IIC_SSE_PALIGNRR>, Sched<[WriteShuffle]>; let mayLoad = 1 in def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), @@ -5591,7 +5586,7 @@ multiclass ssse3_palignr { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNRM>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; + [], IIC_SSE_PALIGNRM>, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5601,13 +5596,13 @@ multiclass ssse3_palignr_y { (ins VR256:$src1, VR256:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize, Sched<[WriteShuffle]>; + []>, Sched<[WriteShuffle]>; let mayLoad = 1 in def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; + []>, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5687,25 +5682,24 @@ multiclass SS41I_binop_rm_int8 opc, string OpcodeStr, Intrinsic IntId, OpndItins itins = DEFAULT_ITINS> { def rr : SS48I, OpSize; + [(set VR128:$dst, (IntId VR128:$src))], itins.rr>; def rm : SS48I, OpSize; + itins.rm>; } multiclass SS41I_binop_rm_int16_y opc, string OpcodeStr, Intrinsic IntId> { def Yrr : SS48I, OpSize; + [(set VR256:$dst, (IntId VR128:$src))]>; def Yrm : SS48I, - OpSize; + [(set VR256:$dst, (IntId (load addr:$src)))]>; } let Predicates = [HasAVX] in { @@ -5867,27 +5861,25 @@ multiclass SS41I_binop_rm_int4 opc, string OpcodeStr, Intrinsic IntId, OpndItins itins = DEFAULT_ITINS> { def rr : SS48I, OpSize; + [(set VR128:$dst, (IntId VR128:$src))], itins.rr>; def rm : SS48I, - OpSize; + itins.rm>; } multiclass SS41I_binop_rm_int8_y opc, string OpcodeStr, Intrinsic IntId> { def Yrr : SS48I, OpSize; + [(set VR256:$dst, (IntId VR128:$src))]>; def Yrm : SS48I, - OpSize; + (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>; } let Predicates = [HasAVX] in { @@ -5951,28 +5943,26 @@ multiclass SS41I_binop_rm_int2 opc, string OpcodeStr, Intrinsic IntId, OpndItins itins = DEFAULT_ITINS> { def rr : SS48I, OpSize; + [(set VR128:$dst, (IntId VR128:$src))]>; // Expecting a i16 load any extended to i32 value. def rm : SS48I, - OpSize; + (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>; } multiclass SS41I_binop_rm_int4_y opc, string OpcodeStr, Intrinsic IntId> { def Yrr : SS48I, OpSize; + [(set VR256:$dst, (IntId VR128:$src))]>; // Expecting a i16 load any extended to i32 value. def Yrm : SS48I, - OpSize; + (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>; } let Predicates = [HasAVX] in { @@ -6247,14 +6237,13 @@ multiclass SS41I_extract8 opc, string OpcodeStr> { !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1), - imm:$src2))]>, - OpSize; + imm:$src2))]>; let neverHasSideEffects = 1, mayStore = 1 in def mr : SS4AIi8, OpSize; + []>; // FIXME: // There's an AssertZext in the way of writing the store pattern // (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) @@ -6273,14 +6262,14 @@ multiclass SS41I_extract16 opc, string OpcodeStr> { (ins VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, OpSize; + []>; let neverHasSideEffects = 1, mayStore = 1 in def mr : SS4AIi8, OpSize; + []>; // FIXME: // There's an AssertZext in the way of writing the store pattern // (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst) @@ -6299,13 +6288,13 @@ multiclass SS41I_extract32 opc, string OpcodeStr> { !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32:$dst, - (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize; + (extractelt (v4i32 VR128:$src1), imm:$src2))]>; def mr : SS4AIi8, OpSize; + addr:$dst)]>; } let Predicates = [HasAVX] in @@ -6320,13 +6309,13 @@ multiclass SS41I_extract64 opc, string OpcodeStr> { !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR64:$dst, - (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W; + (extractelt (v2i64 VR128:$src1), imm:$src2))]>, REX_W; def mr : SS4AIi8, OpSize, REX_W; + addr:$dst)]>, REX_W; } let Predicates = [HasAVX] in @@ -6344,14 +6333,13 @@ multiclass SS41I_extractf32 opc, string OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))], - itins.rr>, - OpSize; + itins.rr>; def mr : SS4AIi8, OpSize; + addr:$dst)], itins.rm>; } let ExeDomain = SSEPackedSingle in { @@ -6384,7 +6372,7 @@ multiclass SS41I_insert8 opc, string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>, OpSize; + (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1> { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), - imm:$src3))]>, OpSize; + imm:$src3))]>; } let Predicates = [HasAVX] in @@ -6409,8 +6397,7 @@ multiclass SS41I_insert32 opc, string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, - OpSize; + (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1> { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), - imm:$src3)))]>, OpSize; + imm:$src3)))]>; } let Predicates = [HasAVX] in @@ -6435,8 +6422,7 @@ multiclass SS41I_insert64 opc, string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, - OpSize; + (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1> { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), - imm:$src3)))]>, OpSize; + imm:$src3)))]>; } let Predicates = [HasAVX] in @@ -6466,8 +6452,7 @@ multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1, !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>, - OpSize; + (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1, [(set VR128:$dst, (X86insrtps VR128:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))], itins.rm>, OpSize; + imm:$src3))], itins.rm>; } let ExeDomain = SSEPackedSingle in { @@ -6503,8 +6488,7 @@ let ExeDomain = SSEPackedSingle in { !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))], - IIC_SSE_ROUNDPS_REG>, - OpSize; + IIC_SSE_ROUNDPS_REG>; // Vector intrinsic operation, mem def PSm : SS4AIi8, - OpSize; + IIC_SSE_ROUNDPS_MEM>; } // ExeDomain = SSEPackedSingle let ExeDomain = SSEPackedDouble in { @@ -6524,8 +6507,7 @@ let ExeDomain = SSEPackedDouble in { !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))], - IIC_SSE_ROUNDPS_REG>, - OpSize; + IIC_SSE_ROUNDPS_REG>; // Vector intrinsic operation, mem def PDm : SS4AIi8, - OpSize; + IIC_SSE_ROUNDPS_REG>; } // ExeDomain = SSEPackedDouble } @@ -6553,7 +6534,7 @@ let ExeDomain = GenericDomain in { "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - []>, OpSize; + []>; // Intrinsic operation, reg. let isCodeGenOnly = 1 in @@ -6564,8 +6545,7 @@ let ExeDomain = GenericDomain in { "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>, - OpSize; + [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>; // Intrinsic operation, mem. def SSm : SS4AIi8, - OpSize; + (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>; // Operation, reg. let hasSideEffects = 0 in @@ -6588,7 +6567,7 @@ let ExeDomain = GenericDomain in { "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - []>, OpSize; + []>; // Intrinsic operation, reg. let isCodeGenOnly = 1 in @@ -6599,8 +6578,7 @@ let ExeDomain = GenericDomain in { "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>, - OpSize; + [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>; // Intrinsic operation, mem. def SDm : SS4AIi8, - OpSize; + (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>; } // ExeDomain = GenericDomain } @@ -6759,31 +6736,29 @@ let Defs = [EFLAGS], Predicates = [HasAVX] in { def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, - OpSize, VEX; + VEX; def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>, - OpSize, VEX; + VEX; def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, - OpSize, VEX, VEX_L; + VEX, VEX_L; def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>, - OpSize, VEX, VEX_L; + VEX, VEX_L; } let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, - OpSize; + [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>; def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "ptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, - OpSize; + [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>; } // The bit test instructions below are AVX only @@ -6791,11 +6766,10 @@ multiclass avx_bittest opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> { def rr : SS48I, OpSize, VEX; + [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, VEX; def rm : SS48I, - OpSize, VEX; + [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, VEX; } let Defs = [EFLAGS], Predicates = [HasAVX] in { @@ -6855,13 +6829,12 @@ multiclass SS41I_unop_rm_int_v16 opc, string OpcodeStr, def rr128 : SS48I, OpSize; + [(set VR128:$dst, (IntId128 VR128:$src))]>; def rm128 : SS48I, OpSize; + (IntId128 (bitconvert (memopv2i64 addr:$src))))]>; } let Predicates = [HasAVX] in @@ -6881,16 +6854,15 @@ multiclass SS41I_binop_rm_int opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))], - itins.rr>, OpSize; + itins.rr>; def rm : SS48I, OpSize; + (IntId128 VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))], + itins.rm>; } /// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator @@ -6900,13 +6872,12 @@ multiclass SS41I_binop_rm_int_y opc, string OpcodeStr, def Yrr : SS48I, OpSize; + [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>; def Yrm : SS48I, OpSize; + (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>; } @@ -6921,15 +6892,14 @@ multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, OpSize; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>; def rm : SS48I, OpSize; + (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>; } let Predicates = [HasAVX] in { @@ -7036,8 +7006,7 @@ multiclass SS41I_binop_rmi_int opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>, - OpSize; + [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>; def rmi : SS4AIi8 opc, string OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set RC:$dst, (IntId RC:$src1, - (bitconvert (memop_frag addr:$src2)), imm:$src3))], itins.rm>, - OpSize; + (bitconvert (memop_frag addr:$src2)), imm:$src3))], itins.rm>; } let Predicates = [HasAVX] in { @@ -7128,7 +7096,7 @@ multiclass SS41I_quaternary_int_avx opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], - NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + NoItinerary, SSEPackedInt>, TAPD, VEX_4V, VEX_I8IMM; def rm : Ii8 opc, string OpcodeStr, [(set RC:$dst, (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), RC:$src3))], - NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + NoItinerary, SSEPackedInt>, TAPD, VEX_4V, VEX_I8IMM; } let Predicates = [HasAVX] in { @@ -7228,7 +7196,7 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in { !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))], - itins.rr>, OpSize; + itins.rr>; def rm0 : SS48I, OpSize; + itins.rm>; } } @@ -7297,16 +7265,15 @@ let Predicates = [HasAVX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, - OpSize, VEX; + VEX; let Predicates = [HasAVX2] in def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>, - OpSize, VEX, VEX_L; + VEX, VEX_L; def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, - OpSize; + [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>; //===----------------------------------------------------------------------===// // SSE4.2 - Compare Instructions @@ -7321,15 +7288,14 @@ multiclass SS42I_binop_rm opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, - OpSize; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>; def rm : SS428I, OpSize; + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>; } let Predicates = [HasAVX] in @@ -7369,12 +7335,12 @@ multiclass pcmpistrm_SS42AI { def rr : SS42AI<0x62, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, OpSize; + []>; let mayLoad = 1 in def rm :SS42AI<0x62, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, OpSize; + []>; } let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in { @@ -7404,12 +7370,12 @@ multiclass SS42AI_pcmpestrm { def rr : SS42AI<0x60, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, OpSize; + []>; let mayLoad = 1 in def rm : SS42AI<0x60, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, OpSize; + []>; } let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { @@ -7439,12 +7405,12 @@ multiclass SS42AI_pcmpistri { def rr : SS42AI<0x63, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, OpSize; + []>; let mayLoad = 1 in def rm : SS42AI<0x63, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, OpSize; + []>; } let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in { @@ -7475,12 +7441,12 @@ multiclass SS42AI_pcmpestri { def rr : SS42AI<0x61, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, OpSize; + []>; let mayLoad = 1 in def rm : SS42AI<0x61, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, OpSize; + []>; } let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { @@ -7603,15 +7569,14 @@ multiclass AESI_binop_rm_int opc, string OpcodeStr, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize; + [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>; def rm : AES8I, OpSize; + (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>; } // Perform One Round of an AES Encryption/Decryption Flow @@ -7644,24 +7609,22 @@ let Predicates = [HasAVX, HasAES] in { "vaesimc\t{$src1, $dst|$dst, $src1}", [(set VR128:$dst, (int_x86_aesni_aesimc VR128:$src1))]>, - OpSize, VEX; + VEX; def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1), "vaesimc\t{$src1, $dst|$dst, $src1}", [(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>, - OpSize, VEX; + VEX; } def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1), "aesimc\t{$src1, $dst|$dst, $src1}", [(set VR128:$dst, - (int_x86_aesni_aesimc VR128:$src1))]>, - OpSize; + (int_x86_aesni_aesimc VR128:$src1))]>; def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1), "aesimc\t{$src1, $dst|$dst, $src1}", - [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>, - OpSize; + [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>; // AES Round Key Generation Assist let Predicates = [HasAVX, HasAES] in { @@ -7670,26 +7633,24 @@ let Predicates = [HasAVX, HasAES] in { "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, - OpSize, VEX; + VEX; def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>, - OpSize, VEX; + VEX; } def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, - OpSize; + (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>; def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>, - OpSize; + (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>; //===----------------------------------------------------------------------===// // PCLMUL Instructions @@ -7757,12 +7718,12 @@ def EXTRQI : Ii8<0x78, MRM0r, (outs VR128:$dst), (ins VR128:$src, i8imm:$len, i8imm:$idx), "extrq\t{$idx, $len, $src|$src, $len, $idx}", [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len, - imm:$idx))]>, TB, OpSize; + imm:$idx))]>, PD; def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), "extrq\t{$mask, $src|$src, $mask}", [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, - VR128:$mask))]>, TB, OpSize; + VR128:$mask))]>, PD; def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$src2, i8imm:$len, i8imm:$idx), @@ -8149,10 +8110,10 @@ multiclass f16c_ph2ps { def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), "vcvtph2ps\t{$src, $dst|$dst, $src}", [(set RC:$dst, (Int VR128:$src))]>, - T8, OpSize, VEX; + T8PD, VEX; let neverHasSideEffects = 1, mayLoad = 1 in def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8PD, VEX; } multiclass f16c_ps2ph { @@ -8160,12 +8121,12 @@ multiclass f16c_ps2ph { (ins RC:$src1, i32i8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>, - TA, OpSize, VEX; + TAPD, VEX; let neverHasSideEffects = 1, mayStore = 1 in def mr : Ii8<0x1D, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src1, i32i8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - TA, OpSize, VEX; + TAPD, VEX; } let Predicates = [HasF16C] in { diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index 3be1129eeca..ebabd493228 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -908,8 +908,8 @@ let Predicates = [HasBMI2] in { defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem>, T8XS, VEX_W; defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem>, T8XD; defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, VEX_W; - defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8, OpSize; - defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, OpSize, VEX_W; + defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8PD; + defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8PD, VEX_W; // Prefer RORX which is non-destructive and doesn't update EFLAGS. let AddedComplexity = 10 in { diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index a559909a723..8c557e72337 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -542,10 +542,10 @@ let Predicates = [HasFSGSBase, In64BitMode] in { //===----------------------------------------------------------------------===// // INVPCID Instruction def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD, Requires<[Not64BitMode]>; def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td index 5bf46d14651..25be8a5a9e2 100644 --- a/lib/Target/X86/X86InstrVMX.td +++ b/lib/Target/X86/X86InstrVMX.td @@ -17,22 +17,22 @@ // 66 0F 38 80 def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invept\t{$src2, $src1|$src1, $src2}", []>, T8PD, Requires<[Not64BitMode]>; def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invept\t{$src2, $src1|$src1, $src2}", []>, T8PD, Requires<[In64BitMode]>; // 66 0F 38 81 def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD, Requires<[Not64BitMode]>; def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD, Requires<[In64BitMode]>; // 0F 01 C1 def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), - "vmclear\t$vmcs", []>, OpSize, TB; + "vmclear\t$vmcs", []>, PD; // OF 01 D4 def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, TB; // 0F 01 C2 diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index ab97a5f5433..b88cc529ab8 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -80,7 +80,7 @@ namespace X86Local { XD = 11, XS = 12, T8 = 13, P_TA = 14, A6 = 15, A7 = 16, T8XD = 17, T8XS = 18, TAXD = 19, - XOP8 = 20, XOP9 = 21, XOPA = 22 + XOP8 = 20, XOP9 = 21, XOPA = 22, PD = 23, T8PD = 24, TAPD = 25, }; } @@ -254,7 +254,9 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables, Operands = &insn.Operands.OperandList; - IsSSE = (HasOpSizePrefix && (Name.find("16") == Name.npos)) || + IsSSE = ((HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) && + (Name.find("16") == Name.npos)) || (Name.find("CRC32") != Name.npos); HasVEX_LPrefix = Rec->getValueAsBit("hasVEX_L"); @@ -309,7 +311,7 @@ InstructionContext RecognizableInstr::insnContext() const { } // VEX_L & VEX_W if (HasVEX_LPrefix && HasVEX_WPrefix) { - if (HasOpSizePrefix) + if (HasOpSizePrefix || Prefix == X86Local::PD) insnContext = EVEX_KB(IC_EVEX_L_W_OPSIZE); else if (Prefix == X86Local::XS || Prefix == X86Local::T8XS) insnContext = EVEX_KB(IC_EVEX_L_W_XS); @@ -320,7 +322,8 @@ InstructionContext RecognizableInstr::insnContext() const { insnContext = EVEX_KB(IC_EVEX_L_W); } else if (HasVEX_LPrefix) { // VEX_L - if (HasOpSizePrefix) + if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = EVEX_KB(IC_EVEX_L_OPSIZE); else if (Prefix == X86Local::XS || Prefix == X86Local::T8XS) insnContext = EVEX_KB(IC_EVEX_L_XS); @@ -332,7 +335,8 @@ InstructionContext RecognizableInstr::insnContext() const { } else if (HasEVEX_L2Prefix && HasVEX_WPrefix) { // EVEX_L2 & VEX_W - if (HasOpSizePrefix) + if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = EVEX_KB(IC_EVEX_L2_W_OPSIZE); else if (Prefix == X86Local::XS || Prefix == X86Local::T8XS) insnContext = EVEX_KB(IC_EVEX_L2_W_XS); @@ -343,10 +347,11 @@ InstructionContext RecognizableInstr::insnContext() const { insnContext = EVEX_KB(IC_EVEX_L2_W); } else if (HasEVEX_L2Prefix) { // EVEX_L2 - if (HasOpSizePrefix) + if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = EVEX_KB(IC_EVEX_L2_OPSIZE); else if (Prefix == X86Local::XD || Prefix == X86Local::T8XD || - Prefix == X86Local::TAXD) + Prefix == X86Local::TAXD) insnContext = EVEX_KB(IC_EVEX_L2_XD); else if (Prefix == X86Local::XS || Prefix == X86Local::T8XS) insnContext = EVEX_KB(IC_EVEX_L2_XS); @@ -355,7 +360,8 @@ InstructionContext RecognizableInstr::insnContext() const { } else if (HasVEX_WPrefix) { // VEX_W - if (HasOpSizePrefix) + if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = EVEX_KB(IC_EVEX_W_OPSIZE); else if (Prefix == X86Local::XS || Prefix == X86Local::T8XS) insnContext = EVEX_KB(IC_EVEX_W_XS); @@ -366,7 +372,8 @@ InstructionContext RecognizableInstr::insnContext() const { insnContext = EVEX_KB(IC_EVEX_W); } // No L, no W - else if (HasOpSizePrefix) + else if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = EVEX_KB(IC_EVEX_OPSIZE); else if (Prefix == X86Local::XD || Prefix == X86Local::T8XD || Prefix == X86Local::TAXD) @@ -378,7 +385,8 @@ InstructionContext RecognizableInstr::insnContext() const { /// eof EVEX } else if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix|| HasVEXPrefix) { if (HasVEX_LPrefix && HasVEX_WPrefix) { - if (HasOpSizePrefix) + if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = IC_VEX_L_W_OPSIZE; else if (Prefix == X86Local::XS || Prefix == X86Local::T8XS) insnContext = IC_VEX_L_W_XS; @@ -387,11 +395,16 @@ InstructionContext RecognizableInstr::insnContext() const { insnContext = IC_VEX_L_W_XD; else insnContext = IC_VEX_L_W; - } else if (HasOpSizePrefix && HasVEX_LPrefix) + } else if ((HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) && + HasVEX_LPrefix) insnContext = IC_VEX_L_OPSIZE; - else if (HasOpSizePrefix && HasVEX_WPrefix) + else if ((HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) && + HasVEX_WPrefix) insnContext = IC_VEX_W_OPSIZE; - else if (HasOpSizePrefix) + else if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = IC_VEX_OPSIZE; else if (HasVEX_LPrefix && (Prefix == X86Local::XS || Prefix == X86Local::T8XS)) @@ -419,7 +432,8 @@ InstructionContext RecognizableInstr::insnContext() const { else insnContext = IC_VEX; } else if (Is64Bit || HasREX_WPrefix) { - if (HasREX_WPrefix && HasOpSizePrefix) + if (HasREX_WPrefix && (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD)) insnContext = IC_64BIT_REXW_OPSIZE; else if (HasOpSizePrefix && (Prefix == X86Local::XD || Prefix == X86Local::T8XD || @@ -428,7 +442,8 @@ InstructionContext RecognizableInstr::insnContext() const { else if (HasOpSizePrefix && (Prefix == X86Local::XS || Prefix == X86Local::T8XS)) insnContext = IC_64BIT_XS_OPSIZE; - else if (HasOpSizePrefix) + else if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = IC_64BIT_OPSIZE; else if (HasAdSizePrefix) insnContext = IC_64BIT_ADSIZE; @@ -458,7 +473,8 @@ InstructionContext RecognizableInstr::insnContext() const { insnContext = IC_XS_OPSIZE; else if (HasOpSizePrefix && HasAdSizePrefix) insnContext = IC_OPSIZE_ADSIZE; - else if (HasOpSizePrefix) + else if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = IC_OPSIZE; else if (HasAdSizePrefix) insnContext = IC_ADSIZE; @@ -851,7 +867,8 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { switch (Prefix) { default: llvm_unreachable("Invalid prefix!"); - // Extended two-byte opcodes can start with f2 0f, f3 0f, or 0f + // Extended two-byte opcodes can start with 66 0f, f2 0f, f3 0f, or 0f + case X86Local::PD: case X86Local::XD: case X86Local::XS: case X86Local::TB: @@ -897,6 +914,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { opcodeToSet = Opcode; break; case X86Local::T8: + case X86Local::T8PD: case X86Local::T8XD: case X86Local::T8XS: opcodeType = THREEBYTE_38; @@ -940,6 +958,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { opcodeToSet = Opcode; break; case X86Local::P_TA: + case X86Local::TAPD: case X86Local::TAXD: opcodeType = THREEBYTE_3A; if (needsModRMForDecode(Form))