mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-01 17:28:21 +00:00
[AVX512] Intrinsics for vextract*x4
This adds the Pat<>'s for the intrinsics. These are necessary because we don't lower these intrinsics to SDNodes but match them directly. See the rational in the previous commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219362 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e868005a27
commit
fbd0e464dd
@ -1791,6 +1791,23 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
|
||||
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vextractf32x4_512 :
|
||||
GCCBuiltin<"__builtin_ia32_extractf32x4_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i8_ty,
|
||||
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vextracti32x4_512 :
|
||||
GCCBuiltin<"__builtin_ia32_extracti32x4_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i8_ty,
|
||||
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vextractf64x4_512 :
|
||||
GCCBuiltin<"__builtin_ia32_extractf64x4_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i8_ty,
|
||||
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vextracti64x4_512 :
|
||||
GCCBuiltin<"__builtin_ia32_extracti64x4_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i8_ty,
|
||||
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Conditional load ops
|
||||
|
@ -449,6 +449,29 @@ multiclass vextract_for_size<int Opcode,
|
||||
def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))),
|
||||
(AltTo.VT
|
||||
(EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>;
|
||||
|
||||
// Intrinsic call with masking.
|
||||
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
|
||||
"x4_512")
|
||||
VR512:$src1, (iPTR imm:$idx), To.RC:$src0, GR8:$mask),
|
||||
(!cast<Instruction>(NAME # To.EltSize # "x4rrk") To.RC:$src0,
|
||||
(v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
|
||||
VR512:$src1, imm:$idx)>;
|
||||
|
||||
// Intrinsic call with zero-masking.
|
||||
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
|
||||
"x4_512")
|
||||
VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, GR8:$mask),
|
||||
(!cast<Instruction>(NAME # To.EltSize # "x4rrkz")
|
||||
(v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
|
||||
VR512:$src1, imm:$idx)>;
|
||||
|
||||
// Intrinsic call without masking.
|
||||
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
|
||||
"x4_512")
|
||||
VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
|
||||
(!cast<Instruction>(NAME # To.EltSize # "x4rr")
|
||||
VR512:$src1, imm:$idx)>;
|
||||
}
|
||||
|
||||
multiclass vextract_for_type<ValueType EltVT32, int Opcode32,
|
||||
|
@ -949,3 +949,39 @@ define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
|
||||
|
||||
define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vextractf32x4:
|
||||
; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
|
||||
|
||||
define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vextracti64x4:
|
||||
; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
|
||||
|
||||
define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
|
||||
; CHECK-LABEL: test_maskz_vextracti32x4:
|
||||
; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
|
||||
|
||||
define <4 x double> @test_vextractf64x4(<8 x double> %a) {
|
||||
; CHECK-LABEL: test_vextractf64x4:
|
||||
; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
|
||||
%res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
|
||||
|
Loading…
Reference in New Issue
Block a user