diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index d50f98c0c59..183317b23fa 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -1248,6 +1248,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_ptestnzc_256 : GCCBuiltin<"__builtin_ia32_ptestnzc256">, Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_ptestm_d_512 : GCCBuiltin<"__builtin_ia32_ptestmd512">, + Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_ptestm_q_512 : GCCBuiltin<"__builtin_ia32_ptestmq512">, + Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, + llvm_i8_ty], [IntrNoMem]>; } // Vector extract sign mask @@ -1696,6 +1702,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_pbroadcastq_256 : GCCBuiltin<"__builtin_ia32_pbroadcastq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pbroadcast_d_gpr_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastd512_gpr_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty, llvm_v16i32_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pbroadcast_q_gpr_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastq512_gpr_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty, llvm_v8i64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pbroadcast_q_mem_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastq512_mem_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty, llvm_v8i64_ty, + llvm_i8_ty], [IntrNoMem]>; } // Vector permutation @@ -2876,54 +2894,62 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_rcp14ps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty], - [IntrNoMem]>; - def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_rcp14pd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty], - [IntrNoMem]>; - def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], - [IntrNoMem]>; - def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd">, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], - [IntrNoMem]>; - def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty], - [IntrNoMem]>; - def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt14pd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty], - [IntrNoMem]>; - def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], - [IntrNoMem]>; - def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd">, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], - [IntrNoMem]>; + def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rcp28_ps_512 : GCCBuiltin<"__builtin_ia32_rcp28ps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty], + def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt14pd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_rcp14pd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_rcp14ps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_rcp28_ps : GCCBuiltin<"__builtin_ia32_rcp28ps_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_rcp28_pd : GCCBuiltin<"__builtin_ia32_rcp28pd_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rcp28_pd_512 : GCCBuiltin<"__builtin_ia32_rcp28pd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty], + def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], + def int_x86_avx512_rsqrt28_ps : GCCBuiltin<"__builtin_ia32_rsqrt28ps_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd">, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], + def int_x86_avx512_rsqrt28_pd : GCCBuiltin<"__builtin_ia32_rsqrt28pd_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt28_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt28ps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty], + def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt28_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt28pd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty], - [IntrNoMem]>; - def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], - [IntrNoMem]>; - def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd">, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], + def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; } diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index e214e9b5879..8c2bb0f9e9f 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -125,14 +125,12 @@ void X86ATTInstPrinter::printAVXCC(const MCInst *MI, unsigned Op, void X86ATTInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &O) { - int64_t Imm = MI->getOperand(Op).getImm() & 0x1f; + int64_t Imm = MI->getOperand(Op).getImm() & 0x3; switch (Imm) { case 0: O << "{rn-sae}"; break; case 1: O << "{rd-sae}"; break; case 2: O << "{ru-sae}"; break; case 3: O << "{rz-sae}"; break; - - default: llvm_unreachable("Invalid AVX-512 rounding control argument!"); } } /// printPCRelImm - This is used to print an immediate value that ends up diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 320ac5addbb..7255d560cca 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -115,14 +115,12 @@ void X86IntelInstPrinter::printAVXCC(const MCInst *MI, unsigned Op, void X86IntelInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &O) { - int64_t Imm = MI->getOperand(Op).getImm() & 0x1f; + int64_t Imm = MI->getOperand(Op).getImm() & 0x3; switch (Imm) { case 0: O << "{rn-sae}"; break; case 1: O << "{rd-sae}"; break; case 2: O << "{ru-sae}"; break; case 3: O << "{rz-sae}"; break; - - default: llvm_unreachable("Invalid AVX-512 rounding control argument!"); } } diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index d6e037224ea..5b1cf5a1a58 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -503,8 +503,10 @@ namespace X86II { MemOp4 = 1U << 18, /// XOP - Opcode prefix used by XOP instructions. - XOP = 1U << 19 + XOP = 1U << 19, + /// Explicitly specified rounding control + EVEX_RC = 1U << 20 }; // getBaseOpcodeFor - This function returns the "base" X86 opcode for the diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 576b6e0eb64..48bd6f19351 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -604,7 +604,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; - bool HasEVEX_RC = false; + bool HasEVEX_RC = (TSFlags >> X86II::VEXShift) & X86II::EVEX_RC; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -686,6 +686,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // EVEX_aaa unsigned char EVEX_aaa = 0; + bool EncodeRC = false; + // Encode the operand size opcode prefix as needed. if (TSFlags & X86II::OpSize) VEX_PP = 0x01; @@ -749,7 +751,6 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // Classify VEX_B, VEX_4V, VEX_R, VEX_X unsigned NumOps = Desc.getNumOperands(); - unsigned RcOperand = NumOps-1; unsigned CurOp = 0; if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0) ++CurOp; @@ -910,10 +911,13 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if (HasVEX_4VOp3) VEX_4V = getVEXRegisterEncoding(MI, CurOp++); if (EVEX_b) { - assert(RcOperand >= CurOp); - EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3; - HasEVEX_RC = true; - } + if (HasEVEX_RC) { + unsigned RcOperand = NumOps-1; + assert(RcOperand >= CurOp); + EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3; + } + EncodeRC = true; + } break; case X86II::MRMDestReg: // MRMDestReg instructions forms: @@ -940,6 +944,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_R = 0x0; if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) EVEX_R2 = 0x0; + if (EVEX_b) + EncodeRC = true; break; case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: case X86II::MRM3r: @@ -1013,7 +1019,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, (VEX_4V << 3) | (EVEX_U << 2) | VEX_PP, CurByte, OS); - if (HasEVEX_RC) + if (EncodeRC) EmitByte((EVEX_z << 7) | (EVEX_rc << 5) | (EVEX_b << 4) | @@ -1293,7 +1299,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // It uses the EVEX.aaa field? bool HasEVEX = (TSFlags >> X86II::VEXShift) & X86II::EVEX; bool HasEVEX_K = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K); - bool HasEVEX_B = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_B); + bool HasEVEX_RC = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_RC); // Determine where the memory operand starts, if present. int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode); @@ -1391,7 +1397,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if (HasVEX_4VOp3) ++CurOp; // do not count the rounding control operand - if (HasEVEX_B) + if (HasEVEX_RC) NumOps--; break; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 5d91e37f5ef..c1c3c3cff2d 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -432,6 +432,13 @@ def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))), def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))), (VPBROADCASTQrZrr GR64:$src)>; +def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src), + (v16i32 immAllZerosV), (i16 GR16:$mask))), + (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>; +def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src), + (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))), + (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>; + multiclass avx512_int_broadcast_rm opc, string OpcodeStr, X86MemOperand x86memop, PatFrag ld_frag, RegisterClass DstRC, ValueType OpVT, ValueType SrcVT, @@ -639,12 +646,12 @@ defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd", def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1), (v16f32 VR512:$src2), (i16 GR16:$mask))), - (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16), + (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), VR512:$src1, VR512:$src2)>; def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1), (v8f64 VR512:$src2), (i8 GR8:$mask))), - (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8), + (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), VR512:$src1, VR512:$src2)>; defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd", @@ -809,7 +816,7 @@ multiclass avx512_cmp_packed; def rrib: AVX512PIi8<0xC2, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc, i32imm:$sae), + (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc), !strconcat("vcmp${cc}", suffix, "\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"), [], d>, EVEX_B; @@ -859,13 +866,13 @@ def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1), (v16f32 VR512:$src2), imm:$cc, (i16 -1), FROUND_NO_EXC)), (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2, - (I8Imm imm:$cc), (i32 0)), GR16)>; + (I8Imm imm:$cc)), GR16)>; def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), (v8f64 VR512:$src2), imm:$cc, (i8 -1), FROUND_NO_EXC)), (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2, - (I8Imm imm:$cc), (i32 0)), GR8)>; + (I8Imm imm:$cc)), GR8)>; def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1), (v16f32 VR512:$src2), imm:$cc, (i16 -1), @@ -949,6 +956,10 @@ let Predicates = [HasAVX512] in { def : Pat<(i8 (zext VK1:$src)), (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>; + def : Pat<(i64 (zext VK1:$src)), + (SUBREG_TO_REG (i64 0), + (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit)>; + } // With AVX-512 only, 8-bit mask is promoted to 16-bit mask. let Predicates = [HasAVX512] in { @@ -1170,6 +1181,9 @@ defm KSET1 : avx512_mask_setop_w; let Predicates = [HasAVX512] in { def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; + def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>; + def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>; + def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>; } def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))), (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>; @@ -2112,6 +2126,13 @@ defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem, memopv8i64, X86testm, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1), + (v16i32 VR512:$src2), (i16 -1))), + (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>; + +def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1), + (v8i64 VR512:$src2), (i8 -1))), + (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR8)>; //===----------------------------------------------------------------------===// // AVX-512 Shift instructions //===----------------------------------------------------------------------===// @@ -2791,7 +2812,7 @@ let hasSideEffects = 0 in { (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX; def rrb : AVX512PI, EVEX, EVEX_B; + [], d>, EVEX, EVEX_B, EVEX_RC; let mayLoad = 1 in def rm : AVX512PI, EVEX; def rrb : AVX512PI, EVEX, EVEX_B; + [], d>, EVEX, EVEX_B, EVEX_RC; let mayLoad = 1 in def rm : AVX512PI opc, string OpcodeStr, SDNode OpNode> { - def PSZr : AVX5128I, - EVEX, EVEX_V512; - def PSZm : AVX5128I, - EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; - def PDZr : AVX5128I, - EVEX, EVEX_V512, VEX_W; - def PDZm : AVX5128I, - EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -} - -/// avx512_fp_unop_p_int - AVX-512 intrinsics unops in packed forms. -multiclass avx512_fp_unop_p_int opc, string OpcodeStr, - Intrinsic V16F32Int, Intrinsic V8F64Int> { -let isCodeGenOnly = 1 in { - def PSZr_Int : AVX5128I, - EVEX, EVEX_V512; - def PSZm_Int : AVX5128I, EVEX, - EVEX_V512, EVEX_CD8<32, CD8VF>; - def PDZr_Int : AVX5128I, - EVEX, EVEX_V512, VEX_W; - def PDZm_Int : AVX5128I, - EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -} // isCodeGenOnly = 1 -} - -/// avx512_fp_unop_s - AVX-512 unops in scalar form. -multiclass avx512_fp_unop_s opc, string OpcodeStr> { +/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd +multiclass avx512_fp14_s opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop> { let hasSideEffects = 0 in { - def SSZr : AVX5128I, EVEX_4V; + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V; let mayLoad = 1 in { - def SSZm : AVX5128I, EVEX_4V, EVEX_CD8<32, CD8VT1>; - let isCodeGenOnly = 1 in - def SSZm_Int : AVX5128I, EVEX_4V, EVEX_CD8<32, CD8VT1>; - } - def SDZr : AVX5128I, - EVEX_4V, VEX_W; - let mayLoad = 1 in { - def SDZm : AVX5128I, - EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; - let isCodeGenOnly = 1 in - def SDZm_Int : AVX5128I, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V; } } } -defm VRCP14 : avx512_fp_unop_s<0x4D, "vrcp14">, - avx512_fp_unop_p<0x4C, "vrcp14", X86frcp>, - avx512_fp_unop_p_int<0x4C, "vrcp14", - int_x86_avx512_rcp14_ps_512, int_x86_avx512_rcp14_pd_512>; +defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>, + EVEX_CD8<32, CD8VT1>; +defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>, + VEX_W, EVEX_CD8<64, CD8VT1>; +defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>, + EVEX_CD8<32, CD8VT1>; +defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>, + VEX_W, EVEX_CD8<64, CD8VT1>; -defm VRSQRT14 : avx512_fp_unop_s<0x4F, "vrsqrt14">, - avx512_fp_unop_p<0x4E, "vrsqrt14", X86frsqrt>, - avx512_fp_unop_p_int<0x4E, "vrsqrt14", - int_x86_avx512_rsqrt14_ps_512, int_x86_avx512_rsqrt14_pd_512>; +def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1), + (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))), + (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X), + (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>; -def : Pat<(int_x86_avx512_rsqrt14_ss VR128X:$src), - (COPY_TO_REGCLASS (VRSQRT14SSZr (f32 (IMPLICIT_DEF)), - (COPY_TO_REGCLASS VR128X:$src, FR32)), - VR128X)>; -def : Pat<(int_x86_avx512_rsqrt14_ss sse_load_f32:$src), - (VRSQRT14SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; +def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1), + (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))), + (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X), + (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>; -def : Pat<(int_x86_avx512_rcp14_ss VR128X:$src), - (COPY_TO_REGCLASS (VRCP14SSZr (f32 (IMPLICIT_DEF)), - (COPY_TO_REGCLASS VR128X:$src, FR32)), - VR128X)>; -def : Pat<(int_x86_avx512_rcp14_ss sse_load_f32:$src), - (VRCP14SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; +def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1), + (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))), + (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X), + (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>; -let AddedComplexity = 20, Predicates = [HasERI] in { -defm VRCP28 : avx512_fp_unop_s<0xCB, "vrcp28">, - avx512_fp_unop_p<0xCA, "vrcp28", X86frcp>, - avx512_fp_unop_p_int<0xCA, "vrcp28", - int_x86_avx512_rcp28_ps_512, int_x86_avx512_rcp28_pd_512>; +def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1), + (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))), + (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X), + (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>; -defm VRSQRT28 : avx512_fp_unop_s<0xCD, "vrsqrt28">, - avx512_fp_unop_p<0xCC, "vrsqrt28", X86frsqrt>, - avx512_fp_unop_p_int<0xCC, "vrsqrt28", - int_x86_avx512_rsqrt28_ps_512, int_x86_avx512_rsqrt28_pd_512>; +/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd +multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, X86MemOperand x86memop, + PatFrag mem_frag, ValueType OpVt> { + def r : AVX5128I, + EVEX; + def m : AVX5128I, + EVEX; +} +defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem, + memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem, + memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; +defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem, + memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem, + memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + +def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src), + (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))), + (VRSQRT14PSZr VR512:$src)>; +def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src), + (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))), + (VRSQRT14PDZr VR512:$src)>; + +def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src), + (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))), + (VRCP14PSZr VR512:$src)>; +def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src), + (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))), + (VRCP14PDZr VR512:$src)>; + +/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd +multiclass avx512_fp28_s opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop> { + let hasSideEffects = 0, Predicates = [HasERI] in { + def rr : AVX5128I, EVEX_4V; + def rrb : AVX5128I, EVEX_4V, EVEX_B; + let mayLoad = 1 in { + def rm : AVX5128I, EVEX_4V; + } +} } -let Predicates = [HasERI] in { - def : Pat<(int_x86_avx512_rsqrt28_ss VR128X:$src), - (COPY_TO_REGCLASS (VRSQRT28SSZr (f32 (IMPLICIT_DEF)), - (COPY_TO_REGCLASS VR128X:$src, FR32)), - VR128X)>; - def : Pat<(int_x86_avx512_rsqrt28_ss sse_load_f32:$src), - (VRSQRT28SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; +defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>, + EVEX_CD8<32, CD8VT1>; +defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>, + VEX_W, EVEX_CD8<64, CD8VT1>; +defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>, + EVEX_CD8<32, CD8VT1>; +defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>, + VEX_W, EVEX_CD8<64, CD8VT1>; - def : Pat<(int_x86_avx512_rcp28_ss VR128X:$src), - (COPY_TO_REGCLASS (VRCP28SSZr (f32 (IMPLICIT_DEF)), - (COPY_TO_REGCLASS VR128X:$src, FR32)), - VR128X)>; - def : Pat<(int_x86_avx512_rcp28_ss sse_load_f32:$src), - (VRCP28SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; +def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1), + (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1), + FROUND_NO_EXC)), + (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X), + (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>; + +def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1), + (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1), + FROUND_NO_EXC)), + (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X), + (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>; + +def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1), + (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1), + FROUND_NO_EXC)), + (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X), + (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>; + +def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1), + (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1), + FROUND_NO_EXC)), + (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X), + (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>; + +/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd +multiclass avx512_fp28_p opc, string OpcodeStr, + RegisterClass RC, X86MemOperand x86memop> { + let hasSideEffects = 0, Predicates = [HasERI] in { + def r : AVX5128I, EVEX; + def rb : AVX5128I, EVEX, EVEX_B; + def m : AVX5128I, EVEX; + } } +defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; +defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + +def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src), + (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)), + (VRSQRT28PSZrb VR512:$src)>; +def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src), + (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)), + (VRSQRT28PDZrb VR512:$src)>; + +def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src), + (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)), + (VRCP28PSZrb VR512:$src)>; +def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src), + (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)), + (VRCP28PDZrb VR512:$src)>; + multiclass avx512_sqrt_packed opc, string OpcodeStr, SDNode OpNode, Intrinsic V16F32Int, Intrinsic V8F64Int, OpndItins itins_s, OpndItins itins_d> { @@ -3303,15 +3356,15 @@ let Predicates = [HasAVX512] in { Requires<[OptForSize]>; def : Pat<(f32 (X86frsqrt FR32X:$src)), - (VRSQRT14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; + (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>; def : Pat<(f32 (X86frsqrt (load addr:$src))), - (VRSQRT14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>, + (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[OptForSize]>; def : Pat<(f32 (X86frcp FR32X:$src)), - (VRCP14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; + (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>; def : Pat<(f32 (X86frcp (load addr:$src))), - (VRCP14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>, + (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[OptForSize]>; def : Pat<(int_x86_sse_sqrt_ss VR128X:$src), diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 105fa0c9a98..9da2acf331c 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -150,6 +150,7 @@ class EVEX_4V : VEX_4V { bit hasEVEXPrefix = 1; } class EVEX_K { bit hasEVEX_K = 1; } class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; } class EVEX_B { bit hasEVEX_B = 1; } +class EVEX_RC { bit hasEVEX_RC = 1; } class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; } class EVEX_CD8 { bits<2> EVEX_CD8E = !if(!eq(esize, 8), 0b00, @@ -217,6 +218,7 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding? bit hasMemOp4Prefix = 0; // Same bit as VEX_W, but used for swapping operands bit hasXOP_Prefix = 0; // Does this inst require an XOP prefix? + bit hasEVEX_RC = 0; // Explicitly specified rounding control in FP instruction. // TSFlags layout should be kept in sync with X86InstrInfo.h. let TSFlags{5-0} = FormBits; @@ -247,6 +249,7 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{49} = has3DNow0F0FOpcode; let TSFlags{50} = hasMemOp4Prefix; let TSFlags{51} = hasXOP_Prefix; + let TSFlags{52} = hasEVEX_RC; } class PseudoI pattern> diff --git a/test/CodeGen/X86/avx512-cmp.ll b/test/CodeGen/X86/avx512-cmp.ll index 93aa8b09087..f2e7608c91a 100644 --- a/test/CodeGen/X86/avx512-cmp.ll +++ b/test/CodeGen/X86/avx512-cmp.ll @@ -56,4 +56,21 @@ define i16 @test4(i16 %a, i16 %b) { %b1 = add i16 %a, %b ret i16 %b1 +} + +; CHECK-LABEL: test5 +; CHECK: ret +define float @test5(float %p) #0 { +entry: + %cmp = fcmp oeq float %p, 0.000000e+00 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = fcmp ogt float %p, 0.000000e+00 + %cond = select i1 %cmp1, float 1.000000e+00, float -1.000000e+00 + br label %return + +return: ; preds = %if.end, %entry + %retval.0 = phi float [ %cond, %if.end ], [ %p, %entry ] + ret float %retval.0 } \ No newline at end of file diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 6c17346a1e5..8d569d66c39 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -47,32 +47,32 @@ define i16 @unpckbw_test(i16 %a0, i16 %a1) { } define <16 x float> @test_rcp_ps_512(<16 x float> %a0) { - ; CHECK: vrcp14ps - %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1] + ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0] + %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] ret <16 x float> %res } -declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>) nounwind readnone +declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone define <8 x double> @test_rcp_pd_512(<8 x double> %a0) { - ; CHECK: vrcp14pd - %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1] + ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0] + %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1] ret <8 x double> %res } -declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>) nounwind readnone +declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) { - ; CHECK: vrcp28ps - %res = call <16 x float> @llvm.x86.avx512.rcp28.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1] + ; CHECK: vrcp28ps {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0] + %res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) ; <<16 x float>> [#uses=1] ret <16 x float> %res } -declare <16 x float> @llvm.x86.avx512.rcp28.ps.512(<16 x float>) nounwind readnone +declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) { - ; CHECK: vrcp28pd - %res = call <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1] + ; CHECK: vrcp28pd {sae}, {{.*}}encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0] + %res = call <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) ; <<8 x double>> [#uses=1] ret <8 x double> %res } -declare <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double>) nounwind readnone +declare <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32) @@ -91,46 +91,46 @@ define <16 x float> @test8(<16 x float> %a) { } define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) { - ; CHECK: vrsqrt14ps - %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1] + ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0] + %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] ret <16 x float> %res } -declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>) nounwind readnone +declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone define <16 x float> @test_rsqrt28_ps_512(<16 x float> %a0) { - ; CHECK: vrsqrt28ps - %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1] + ; CHECK: vrsqrt28ps {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0] + %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) ; <<16 x float>> [#uses=1] ret <16 x float> %res } -declare <16 x float> @llvm.x86.avx512.rsqrt28.ps.512(<16 x float>) nounwind readnone +declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) { - ; CHECK: vrsqrt14ss - %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0] + %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } -declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>) nounwind readnone +declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) { - ; CHECK: vrsqrt28ss - %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ; CHECK: vrsqrt28ss {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0] + %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1] ret <4 x float> %res } -declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>) nounwind readnone +declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone define <4 x float> @test_rcp14_ss(<4 x float> %a0) { - ; CHECK: vrcp14ss - %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0] + %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } -declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>) nounwind readnone +declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone define <4 x float> @test_rcp28_ss(<4 x float> %a0) { - ; CHECK: vrcp28ss - %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ; CHECK: vrcp28ss {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0] + %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1] ret <4 x float> %res } -declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>) nounwind readnone +declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) { ; CHECK: vsqrtpd diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp index 4c7066432a8..0ccfac7855e 100644 --- a/utils/TableGen/X86DisassemblerTables.cpp +++ b/utils/TableGen/X86DisassemblerTables.cpp @@ -210,12 +210,13 @@ static inline bool inheritsFrom(InstructionContext child, return inheritsFrom(child, IC_EVEX_W_XD_K) || inheritsFrom(child, IC_EVEX_L_W_XD_K); case IC_EVEX_OPSIZE_K: - return inheritsFrom(child, IC_EVEX_W_OPSIZE_K) || - inheritsFrom(child, IC_EVEX_W_OPSIZE_K); + case IC_EVEX_OPSIZE_B: + return false; case IC_EVEX_W_K: case IC_EVEX_W_XS_K: case IC_EVEX_W_XD_K: case IC_EVEX_W_OPSIZE_K: + case IC_EVEX_W_OPSIZE_B: return false; case IC_EVEX_L_K: case IC_EVEX_L_XS_K: