diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 052fa84f521..537089d4e34 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -2618,6 +2618,11 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">, Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx512_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty], + [IntrNoMem]>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 8cf5bb43717..1be6660e258 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -417,6 +417,8 @@ def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))), (VPBROADCASTDrZrr GR32:$src)>; def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))), (VPBROADCASTQrZrr GR64:$src)>; +def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))), + (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>; multiclass avx512_int_broadcast_rm opc, string OpcodeStr, X86MemOperand x86memop, PatFrag ld_frag, @@ -433,6 +435,7 @@ multiclass avx512_int_broadcast_rm opc, string OpcodeStr, [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>, EVEX, EVEX_KZ; + let mayLoad = 1 in { def rm : AVX5128I opc, string OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"), [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask, (ld_frag addr:$src))))]>, EVEX, EVEX_KZ; + } } defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem, @@ -2572,6 +2576,38 @@ let Predicates = [HasAVX512] in { (VCVTPS2PDZrm addr:$src)>; } +//===----------------------------------------------------------------------===// +// Half precision conversion instructions +//===----------------------------------------------------------------------===// +multiclass avx512_f16c_ph2ps { + def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", + [(set destRC:$dst, (Int srcRC:$src))]>, EVEX; + let neverHasSideEffects = 1, mayLoad = 1 in + def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX; +} + +multiclass avx512_f16c_ps2ph { + def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst), + (ins srcRC:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set destRC:$dst, (Int srcRC:$src1, imm:$src2))]>, EVEX; + let neverHasSideEffects = 1, mayStore = 1 in + def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), + (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX; +} + +defm VCVTPH2PSZ : avx512_f16c_ph2ps, EVEX_V512, + EVEX_CD8<32, CD8VH>; +defm VCVTPS2PHZ : avx512_f16c_ps2ph, EVEX_V512, + EVEX_CD8<32, CD8VH>; + let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, "ucomiss{z}">, TB, EVEX, VEX_LIG, diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index ebf8c2440a8..5aeb070c9a6 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -190,3 +190,18 @@ define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { ret i64 %res } declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone + +define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) { + ; CHECK: vcvtph2ps + %res = call <16 x float> @llvm.x86.avx512.vcvtph2ps.512(<16 x i16> %a0) + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.vcvtph2ps.512(<16 x i16>) nounwind readonly + + +define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) { + ; CHECK: vcvtps2ph + %res = call <16 x i16> @llvm.x86.avx512.vcvtps2ph.512(<16 x float> %a0, i32 0) + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx512.vcvtps2ph.512(<16 x float>, i32) nounwind readonly