mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-15 15:48:38 +00:00
[AVX512] Remove masked logic op intrinsics and autoupgrade them to native IR.
llvm-svn: 275155
This commit is contained in:
parent
62ac9546b0
commit
d120449666
@ -5229,81 +5229,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
}
|
||||
//Bitwise Ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx512_mask_pand_d_128 : GCCBuiltin<"__builtin_ia32_pandd128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pand_d_256 : GCCBuiltin<"__builtin_ia32_pandd256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
|
||||
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pand_d_512 : GCCBuiltin<"__builtin_ia32_pandd512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pand_q_128 : GCCBuiltin<"__builtin_ia32_pandq128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
|
||||
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pand_q_256 : GCCBuiltin<"__builtin_ia32_pandq256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
|
||||
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pand_q_512 : GCCBuiltin<"__builtin_ia32_pandq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pandn_d_128 : GCCBuiltin<"__builtin_ia32_pandnd128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pandn_d_256 : GCCBuiltin<"__builtin_ia32_pandnd256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
|
||||
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pandn_d_512 : GCCBuiltin<"__builtin_ia32_pandnd512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pandn_q_128 : GCCBuiltin<"__builtin_ia32_pandnq128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
|
||||
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pandn_q_256 : GCCBuiltin<"__builtin_ia32_pandnq256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
|
||||
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pandn_q_512 : GCCBuiltin<"__builtin_ia32_pandnq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_por_d_128 : GCCBuiltin<"__builtin_ia32_pord128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_por_d_256 : GCCBuiltin<"__builtin_ia32_pord256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
|
||||
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_por_d_512 : GCCBuiltin<"__builtin_ia32_pord512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_por_q_128 : GCCBuiltin<"__builtin_ia32_porq128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
|
||||
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_por_q_256 : GCCBuiltin<"__builtin_ia32_porq256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
|
||||
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_por_q_512 : GCCBuiltin<"__builtin_ia32_porq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pxor_d_128 : GCCBuiltin<"__builtin_ia32_pxord128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pxor_d_256 : GCCBuiltin<"__builtin_ia32_pxord256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
|
||||
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pxor_d_512 : GCCBuiltin<"__builtin_ia32_pxord512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pxor_q_128 : GCCBuiltin<"__builtin_ia32_pxorq128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
|
||||
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pxor_q_256 : GCCBuiltin<"__builtin_ia32_pxorq256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
|
||||
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pxor_q_512 : GCCBuiltin<"__builtin_ia32_pxorq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Arithmetic ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
|
||||
|
@ -239,6 +239,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
Name.startswith("avx512.mask.punpckh") ||
|
||||
Name.startswith("avx512.mask.unpckl.") ||
|
||||
Name.startswith("avx512.mask.unpckh.") ||
|
||||
Name.startswith("avx512.mask.pand.") ||
|
||||
Name.startswith("avx512.mask.pandn.") ||
|
||||
Name.startswith("avx512.mask.por.") ||
|
||||
Name.startswith("avx512.mask.pxor.") ||
|
||||
Name.startswith("sse41.pmovsx") ||
|
||||
Name.startswith("sse41.pmovzx") ||
|
||||
Name.startswith("avx2.pmovsx") ||
|
||||
@ -1179,6 +1183,23 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
|
||||
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
|
||||
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
|
||||
Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
|
||||
Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
|
||||
CI->getArgOperand(1));
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.por.")) {
|
||||
Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
|
||||
Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else {
|
||||
|
@ -875,18 +875,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pand_d_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pand_d_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pand_q_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pand_q_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pand_q_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pandn_d_128, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pandn_d_256, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pandn_d_512, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pandn_q_128, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pandn_q_256, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pandn_q_512, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pavg_b_128, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pavg_b_256, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pavg_b_512, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
|
||||
@ -1221,12 +1209,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86ISD::PMULUDQ, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_512, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::PMULUDQ, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_por_d_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_por_d_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_por_d_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_prol_d_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VROTLI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_prol_d_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VROTLI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_prol_d_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VROTLI, 0),
|
||||
@ -1374,12 +1356,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pternlog_q_512, TERLOG_OP_MASK,
|
||||
X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pxor_d_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pxor_d_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pxor_q_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pxor_q_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
|
||||
|
@ -956,3 +956,134 @@ define void @test_storent_ps_512(<16 x float> %data, i8* %ptr) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_xor_epi32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_xor_epi32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_or_epi32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_or_epi32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_and_epi32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_and_epi32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_xor_epi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_xor_epi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_or_epi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_or_epi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_and_epi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_and_epi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
@ -1946,139 +1946,6 @@ define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_xor_epi32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_xor_epi32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_or_epi32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_or_epi32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_and_epi32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_and_epi32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_xor_epi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_xor_epi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_or_epi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_or_epi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_and_epi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_and_epi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rr:
|
||||
; CHECK: ## BB#0:
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user