mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-04-03 16:21:41 +00:00
[AVX-512] Remove masked integer add/sub/mull intrinsics and upgrade to native IR.
llvm-svn: 280611
This commit is contained in:
parent
8e8eb5eaa8
commit
7bf68ae691
@ -5701,24 +5701,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
}
|
||||
// Integer arithmetic ops
|
||||
let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mask_padd_b_128 : GCCBuiltin<"__builtin_ia32_paddb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padd_b_256 : GCCBuiltin<"__builtin_ia32_paddb256_mask">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
|
||||
llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padd_b_512 : GCCBuiltin<"__builtin_ia32_paddb512_mask">,
|
||||
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
|
||||
llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padd_w_128 : GCCBuiltin<"__builtin_ia32_paddw128_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
|
||||
llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padd_w_256 : GCCBuiltin<"__builtin_ia32_paddw256_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
|
||||
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padd_w_512 : GCCBuiltin<"__builtin_ia32_paddw512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padds_b_128 : GCCBuiltin<"__builtin_ia32_paddsb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
@ -5755,42 +5737,6 @@ let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mask_paddus_w_512 : GCCBuiltin<"__builtin_ia32_paddusw512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padd_d_128 : GCCBuiltin<"__builtin_ia32_paddd128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padd_d_256 : GCCBuiltin<"__builtin_ia32_paddd256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
|
||||
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padd_d_512 : GCCBuiltin<"__builtin_ia32_paddd512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padd_q_128 : GCCBuiltin<"__builtin_ia32_paddq128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
|
||||
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padd_q_256 : GCCBuiltin<"__builtin_ia32_paddq256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
|
||||
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padd_q_512 : GCCBuiltin<"__builtin_ia32_paddq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psub_b_128 : GCCBuiltin<"__builtin_ia32_psubb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psub_b_256 : GCCBuiltin<"__builtin_ia32_psubb256_mask">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
|
||||
llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psub_b_512 : GCCBuiltin<"__builtin_ia32_psubb512_mask">,
|
||||
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
|
||||
llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psub_w_128 : GCCBuiltin<"__builtin_ia32_psubw128_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
|
||||
llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psub_w_256 : GCCBuiltin<"__builtin_ia32_psubw256_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
|
||||
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psub_w_512 : GCCBuiltin<"__builtin_ia32_psubw512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psubs_b_128 : GCCBuiltin<"__builtin_ia32_psubsb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
@ -5827,24 +5773,6 @@ let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mask_psubus_w_512 : GCCBuiltin<"__builtin_ia32_psubusw512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psub_d_128 : GCCBuiltin<"__builtin_ia32_psubd128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psub_d_256 : GCCBuiltin<"__builtin_ia32_psubd256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
|
||||
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psub_d_512 : GCCBuiltin<"__builtin_ia32_psubd512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psub_q_128 : GCCBuiltin<"__builtin_ia32_psubq128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
|
||||
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psub_q_256 : GCCBuiltin<"__builtin_ia32_psubq256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
|
||||
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psub_q_512 : GCCBuiltin<"__builtin_ia32_psubq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmulu_dq_128 : GCCBuiltin<"__builtin_ia32_pmuludq128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
||||
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
@ -5863,33 +5791,6 @@ let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mask_pmul_dq_512 : GCCBuiltin<"__builtin_ia32_pmuldq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmull_w_128 : GCCBuiltin<"__builtin_ia32_pmullw128_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
|
||||
llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmull_w_256 : GCCBuiltin<"__builtin_ia32_pmullw256_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
|
||||
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmull_w_512 : GCCBuiltin<"__builtin_ia32_pmullw512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmull_d_128 : GCCBuiltin<"__builtin_ia32_pmulld128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmull_d_256 : GCCBuiltin<"__builtin_ia32_pmulld256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
|
||||
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmull_d_512 : GCCBuiltin<"__builtin_ia32_pmulld512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmull_q_128 : GCCBuiltin<"__builtin_ia32_pmullq128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
|
||||
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmull_q_256 : GCCBuiltin<"__builtin_ia32_pmullq256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
|
||||
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmull_q_512 : GCCBuiltin<"__builtin_ia32_pmullq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmulhu_w_512 : GCCBuiltin<"__builtin_ia32_pmulhuw512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
@ -271,6 +271,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
Name.startswith("avx512.mask.andn.") ||
|
||||
Name.startswith("avx512.mask.or.") ||
|
||||
Name.startswith("avx512.mask.xor.") ||
|
||||
Name.startswith("avx512.mask.padd.") ||
|
||||
Name.startswith("avx512.mask.psub.") ||
|
||||
Name.startswith("avx512.mask.pmull.") ||
|
||||
Name.startswith("sse41.pmovsx") ||
|
||||
Name.startswith("sse41.pmovzx") ||
|
||||
Name.startswith("avx2.pmovsx") ||
|
||||
@ -1234,6 +1237,18 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
Rep = Builder.CreateBitCast(Rep, FTy);
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
|
||||
Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
|
||||
Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
|
||||
Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else {
|
||||
llvm_unreachable("Unknown function for CallInst upgrade.");
|
||||
}
|
||||
|
@ -834,18 +834,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_packuswb_128, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_packuswb_256, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_packuswb_512, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_b_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_b_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_b_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_d_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_d_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_d_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_q_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_q_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_q_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_w_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_w_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_w_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
|
||||
@ -1171,15 +1159,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_128, INTR_TYPE_2OP_MASK, ISD::MULHU, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_256, INTR_TYPE_2OP_MASK, ISD::MULHU, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_512, INTR_TYPE_2OP_MASK, ISD::MULHU, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmull_d_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmull_d_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmull_d_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmull_q_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmull_q_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmull_q_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmull_w_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmull_w_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmull_w_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_128, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::MULTISHIFT, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_256, INTR_TYPE_2OP_MASK,
|
||||
@ -1303,18 +1282,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrlv4_si, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrlv8_hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrlv8_si, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_b_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_b_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_b_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_d_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_d_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_d_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_q_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_q_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_q_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_w_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_w_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_w_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
|
||||
|
@ -1079,3 +1079,524 @@ define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %pass
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rr:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rrk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rrkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rm:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmb:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmbk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmbkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rr:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rrk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rrkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rm:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmb:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmbk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmbkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rr:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rrk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rrkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rm:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rmk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rmkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rmb:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rmbk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rmbkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rr:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rrk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rrkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rm:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rmk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rmkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rmb:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rmbk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rmbkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rr_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rrk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rrkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rm_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rmk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rmkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rmb_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rmbk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
|
@ -1944,422 +1944,6 @@ define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rr:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rrk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rrkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rm:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmb:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmbk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmbkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rr:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rrk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rrkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rm:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmb:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmbk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmbkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rr:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rrk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rrkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rm:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rmk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rmkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rmb:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rmbk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi64_rmbkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rr:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rrk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rrkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rm:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rmk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rmkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rmb:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rmbk:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi64_rmbkz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret < 8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_mul_epi32_rr:
|
||||
; CHECK: ## BB#0:
|
||||
@ -2574,110 +2158,6 @@ define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rr_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rrk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rrkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rm_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rmk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rmkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%b = load <16 x i32>, <16 x i32>* %ptr_b
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rmb_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rmbk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret < 16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
; CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
|
||||
; CHECK: ## BB#0:
|
||||
|
@ -623,3 +623,588 @@ define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x
|
||||
ret <16 x i16> %res2
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rr_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rrk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rrkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rm_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rmk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rmkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rr_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rrk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rrkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rm_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rmk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rmkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rr_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rrk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rrkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rm_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rmk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rmkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rr_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rrk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rrkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rm_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rmk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rmkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rr_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rrk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rrkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rm_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rmk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rmkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rr_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rrk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rrkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rm_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rmk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rmkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rr_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rrk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rrkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rm_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rmk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rmkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rr_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rrk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rrkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rm_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rmk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rmkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rr_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rrk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rrkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rm_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rmk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rmkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
|
@ -1987,591 +1987,6 @@ define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double>
|
||||
%res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
|
||||
ret <4 x double> %res
|
||||
}
|
||||
define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rr_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rrk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rrkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rm_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rmk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rmkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rr_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rrk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rrkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rm_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rmk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rmkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rr_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rrk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rrkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rm_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rmk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rmkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rr_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rrk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rrkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rm_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rmk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rmkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rr_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rrk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rrkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rm_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rmk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi16_rmkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rr_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rrk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rrkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rm_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rmk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi16_rmkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rr_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rrk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rrkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rm_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rmk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rmkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rr_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rrk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rrkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rm_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rmk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rmkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rr_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rrk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rrkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rm_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rmk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi16_rmkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
|
||||
define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_packs_epi32_rr_128:
|
||||
|
@ -1249,3 +1249,314 @@ define <16 x float> @test_mask_xor_ps_rmbkz_512(<16 x float> %a, float* %ptr_b,
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rr_512(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rr_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x40,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rrk_512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rrk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rrkz_512(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rrkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rm_512(<8 x i64> %a, <8 x i64>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rm_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rmk_512(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rmkz_512(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rmb_512(<8 x i64> %a, i64* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmb_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rmbk_512(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmbk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x40,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rmbkz_512(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rr_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x40,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rrk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rrkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rm_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i64>, <4 x i64>* %ptr_b
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i64>, <4 x i64>* %ptr_b
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i64>, <4 x i64>* %ptr_b
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmb_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x38,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmbk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x40,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xb9,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rr_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rrk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rrkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rm_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <2 x i64>, <2 x i64>* %ptr_b
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <2 x i64>, <2 x i64>* %ptr_b
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <2 x i64>, <2 x i64>* %ptr_b
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmb_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x18,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmbk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x40,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x99,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
|
||||
|
||||
|
@ -1,317 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq -mattr=+avx512vl --show-mc-encoding| FileCheck %s
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rr_512(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rr_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x40,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rrk_512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rrk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rrkz_512(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rrkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rm_512(<8 x i64> %a, <8 x i64>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rm_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rmk_512(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rmkz_512(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i64>, <8 x i64>* %ptr_b
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rmb_512(<8 x i64> %a, i64* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmb_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rmbk_512(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmbk_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x40,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rmbkz_512(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rr_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x40,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rrk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rrkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rm_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i64>, <4 x i64>* %ptr_b
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i64>, <4 x i64>* %ptr_b
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i64>, <4 x i64>* %ptr_b
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmb_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x38,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmbk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x40,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mask_mullo_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xb9,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rr_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rrk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rrkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
|
||||
; CHECK-NEXT: vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rm_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <2 x i64>, <2 x i64>* %ptr_b
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <2 x i64>, <2 x i64>* %ptr_b
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <2 x i64>, <2 x i64>* %ptr_b
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmb_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x18,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmbk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x40,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mask_mullo_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
|
||||
; CHECK-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x99,0x40,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i64, i64* %ptr_b
|
||||
%vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
|
||||
%b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
|
||||
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double>, <2 x i64>, i8)
|
||||
|
||||
define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
|
||||
|
@ -2518,3 +2518,420 @@ define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rr_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rrk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rrkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rm_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i32>, <4 x i32>* %ptr_b
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i32>, <4 x i32>* %ptr_b
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i32>, <4 x i32>* %ptr_b
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmb_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmbk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmbkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rr_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rrk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rrkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rm_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i32>, <4 x i32>* %ptr_b
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i32>, <4 x i32>* %ptr_b
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i32>, <4 x i32>* %ptr_b
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmb_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmbk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmbkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rr_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rrk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rrkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rm_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i32>, <8 x i32>* %ptr_b
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i32>, <8 x i32>* %ptr_b
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i32>, <8 x i32>* %ptr_b
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmb_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmbk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmbkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rr_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rrk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rrkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rm_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i32>, <8 x i32>* %ptr_b
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i32>, <8 x i32>* %ptr_b
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i32>, <8 x i32>* %ptr_b
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmb_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmbk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmbkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
|
||||
|
||||
|
@ -1439,422 +1439,6 @@ define < 4 x i64> @test_mask_mul_epu32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8
|
||||
|
||||
declare < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rr_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rrk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rrkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rm_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i32>, <4 x i32>* %ptr_b
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i32>, <4 x i32>* %ptr_b
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i32>, <4 x i32>* %ptr_b
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmb_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmbk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmbkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rr_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rrk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rrkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rm_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i32>, <4 x i32>* %ptr_b
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i32>, <4 x i32>* %ptr_b
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i32>, <4 x i32>* %ptr_b
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmb_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmbk_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmbkz_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rr_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rrk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rrkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rm_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i32>, <8 x i32>* %ptr_b
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i32>, <8 x i32>* %ptr_b
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i32>, <8 x i32>* %ptr_b
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmb_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmbk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sub_epi32_rmbkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rr_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rrk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rrkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rm_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i32>, <8 x i32>* %ptr_b
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i32>, <8 x i32>* %ptr_b
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <8 x i32>, <8 x i32>* %ptr_b
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmb_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmbk_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f]
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_add_epi32_rmbkz_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; CHECK-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%q = load i32, i32* %ptr_b
|
||||
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
|
||||
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
|
||||
|
||||
define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
|
||||
; CHECK-LABEL: test_cmpps_256:
|
||||
; CHECK: ## BB#0:
|
||||
@ -5948,9 +5532,9 @@ define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() {
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
|
||||
; CHECK-NEXT: ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A]
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI371_0-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI335_0-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A]
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI371_1-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI335_1-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
@ -5981,9 +5565,9 @@ define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) {
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa64 {{.*#+}} xmm0 = [2,18446744073709551607]
|
||||
; CHECK-NEXT: ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A]
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI373_0-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI337_0-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI373_1-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI337_1-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1)
|
||||
ret <2 x i64> %res
|
||||
|
Loading…
x
Reference in New Issue
Block a user