[x86][icelake]BITALG

2/3
vpshufbitqmb encoding
3/3
vpshufbitqmb intrinsics
Differential Revision: https://reviews.llvm.org/D40222

llvm-svn: 318904
This commit is contained in:
Coby Tayree 2017-11-23 11:15:50 +00:00
parent 87d48e130a
commit be83415853
9 changed files with 273 additions and 0 deletions

View File

@ -1527,6 +1527,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[IntrArgMemOnly]>;
}
// BITALG bits shuffle
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_vpshufbitqmb_128 :
GCCBuiltin<"__builtin_ia32_vpshufbitqmb128_mask">,
Intrinsic<[llvm_i16_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_vpshufbitqmb_256 :
GCCBuiltin<"__builtin_ia32_vpshufbitqmb256_mask">,
Intrinsic<[llvm_i32_ty],
[llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_vpshufbitqmb_512 :
GCCBuiltin<"__builtin_ia32_vpshufbitqmb512_mask">,
Intrinsic<[llvm_i64_ty],
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// AVX2

View File

@ -25267,6 +25267,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPDPBUSDS: return "X86ISD::VPDPBUSDS";
case X86ISD::VPDPWSSD: return "X86ISD::VPDPWSSD";
case X86ISD::VPDPWSSDS: return "X86ISD::VPDPWSSDS";
case X86ISD::VPSHUFBITQMB: return "X86ISD::VPSHUFBITQMB";
}
return nullptr;
}

View File

@ -519,6 +519,9 @@ namespace llvm {
COMPRESS,
EXPAND,
// Bits shuffle
VPSHUFBITQMB,
// Convert Unsigned/Integer to Floating-Point Value with rounding mode.
SINT_TO_FP_RND, UINT_TO_FP_RND,
SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,

View File

@ -10215,3 +10215,30 @@ defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop,
avx512vl_i16_info, HasBITALG>,
avx512_unary_lowering<ctpop, avx512vl_i16_info, HasBITALG>, VEX_W;
multiclass VPSHUFBITQMB_rm<X86VectorVTInfo VTI> {
defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
(ins VTI.RC:$src1, VTI.RC:$src2),
"vpshufbitqmb",
"$src2, $src1", "$src1, $src2",
(X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
(VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD;
defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
(ins VTI.RC:$src1, VTI.MemOp:$src2),
"vpshufbitqmb",
"$src2, $src1", "$src1, $src2",
(X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
(VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>,
EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD;
}
multiclass VPSHUFBITQMB_common<AVX512VLVectorVTInfo VTI> {
let Predicates = [HasBITALG] in
defm Z : VPSHUFBITQMB_rm<VTI.info512>, EVEX_V512;
let Predicates = [HasBITALG, HasVLX] in {
defm Z256 : VPSHUFBITQMB_rm<VTI.info256>, EVEX_V256;
defm Z128 : VPSHUFBITQMB_rm<VTI.info128>, EVEX_V128;
}
}
defm VPSHUFBITQMB : VPSHUFBITQMB_common<avx512vl_i8_info>;

View File

@ -575,6 +575,13 @@ def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1,
def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
[SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
// vpshufbitqmb
def X86Vpshufbitqmb : SDNode<"X86ISD::VPSHUFBITQMB",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisSameAs<1,2>,
SDTCVecEltisVT<0,i1>,
SDTCisSameNumEltsAs<0,1>]>>;
def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
SDTCisSameAs<0,1>, SDTCisInt<2>,
SDTCisVT<3, i32>]>;

View File

@ -1292,6 +1292,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_128, CMP_MASK,
X86ISD::VPSHUFBITQMB, 0),
X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_256, CMP_MASK,
X86ISD::VPSHUFBITQMB, 0),
X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_512, CMP_MASK,
X86ISD::VPSHUFBITQMB, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, ISD::FMA, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, ISD::FMA, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, ISD::FMA,

View File

@ -0,0 +1,41 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bitalg,+avx512vl | FileCheck %s
declare i16 @llvm.x86.avx512.mask.vpshufbitqmb.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
define i16 @test_vpshufbitqmb_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
; CHECK-LABEL: test_vpshufbitqmb_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpshufbitqmb %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.vpshufbitqmb.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
ret i16 %res
}
declare i32 @llvm.x86.avx512.mask.vpshufbitqmb.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
define i32 @test_vpshufbitqmb_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-LABEL: test_vpshufbitqmb_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpshufbitqmb %ymm1, %ymm0, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.mask.vpshufbitqmb.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
ret i32 %res
}
declare i64 @llvm.x86.avx512.mask.vpshufbitqmb.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
define i64 @test_vpshufbitqmb_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
; CHECK-LABEL: test_vpshufbitqmb_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vpshufbitqmb %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovq %k0, %rax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = call i64 @llvm.x86.avx512.mask.vpshufbitqmb.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
ret i64 %res
}

View File

@ -112,3 +112,59 @@
// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0]
vpopcntw -536870910(%rcx,%r14,8), %zmm21 {%k2}
// CHECK: vpshufbitqmb %zmm2, %zmm23, %k1
// CHECK: encoding: [0x62,0xf2,0x45,0x40,0x8f,0xca]
vpshufbitqmb %zmm2, %zmm23, %k1
// CHECK: vpshufbitqmb %zmm2, %zmm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xf2,0x45,0x42,0x8f,0xca]
vpshufbitqmb %zmm2, %zmm23, %k1 {%k2}
// CHECK: vpshufbitqmb (%rcx), %zmm23, %k1
// CHECK: encoding: [0x62,0xf2,0x45,0x40,0x8f,0x09]
vpshufbitqmb (%rcx), %zmm23, %k1
// CHECK: vpshufbitqmb -256(%rsp), %zmm23, %k1
// CHECK: encoding: [0x62,0xf2,0x45,0x40,0x8f,0x4c,0x24,0xfc]
vpshufbitqmb -256(%rsp), %zmm23, %k1
// CHECK: vpshufbitqmb 256(%rsp), %zmm23, %k1
// CHECK: encoding: [0x62,0xf2,0x45,0x40,0x8f,0x4c,0x24,0x04]
vpshufbitqmb 256(%rsp), %zmm23, %k1
// CHECK: vpshufbitqmb 268435456(%rcx,%r14,8), %zmm23, %k1
// CHECK: encoding: [0x62,0xb2,0x45,0x40,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10]
vpshufbitqmb 268435456(%rcx,%r14,8), %zmm23, %k1
// CHECK: vpshufbitqmb -536870912(%rcx,%r14,8), %zmm23, %k1
// CHECK: encoding: [0x62,0xb2,0x45,0x40,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0]
vpshufbitqmb -536870912(%rcx,%r14,8), %zmm23, %k1
// CHECK: vpshufbitqmb -536870910(%rcx,%r14,8), %zmm23, %k1
// CHECK: encoding: [0x62,0xb2,0x45,0x40,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0]
vpshufbitqmb -536870910(%rcx,%r14,8), %zmm23, %k1
// CHECK: vpshufbitqmb (%rcx), %zmm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xf2,0x45,0x42,0x8f,0x09]
vpshufbitqmb (%rcx), %zmm23, %k1 {%k2}
// CHECK: vpshufbitqmb -256(%rsp), %zmm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xf2,0x45,0x42,0x8f,0x4c,0x24,0xfc]
vpshufbitqmb -256(%rsp), %zmm23, %k1 {%k2}
// CHECK: vpshufbitqmb 256(%rsp), %zmm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xf2,0x45,0x42,0x8f,0x4c,0x24,0x04]
vpshufbitqmb 256(%rsp), %zmm23, %k1 {%k2}
// CHECK: vpshufbitqmb 268435456(%rcx,%r14,8), %zmm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xb2,0x45,0x42,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10]
vpshufbitqmb 268435456(%rcx,%r14,8), %zmm23, %k1 {%k2}
// CHECK: vpshufbitqmb -536870912(%rcx,%r14,8), %zmm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xb2,0x45,0x42,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0]
vpshufbitqmb -536870912(%rcx,%r14,8), %zmm23, %k1 {%k2}
// CHECK: vpshufbitqmb -536870910(%rcx,%r14,8), %zmm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xb2,0x45,0x42,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0]
vpshufbitqmb -536870910(%rcx,%r14,8), %zmm23, %k1 {%k2}

View File

@ -224,3 +224,115 @@
// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0]
vpopcntw -536870910(%rcx,%r14,8), %ymm21 {%k2}
// CHECK: vpshufbitqmb %xmm2, %xmm23, %k1
// CHECK: encoding: [0x62,0xf2,0x45,0x00,0x8f,0xca]
vpshufbitqmb %xmm2, %xmm23, %k1
// CHECK: vpshufbitqmb %xmm2, %xmm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xf2,0x45,0x02,0x8f,0xca]
vpshufbitqmb %xmm2, %xmm23, %k1 {%k2}
// CHECK: vpshufbitqmb (%rcx), %xmm23, %k1
// CHECK: encoding: [0x62,0xf2,0x45,0x00,0x8f,0x09]
vpshufbitqmb (%rcx), %xmm23, %k1
// CHECK: vpshufbitqmb -64(%rsp), %xmm23, %k1
// CHECK: encoding: [0x62,0xf2,0x45,0x00,0x8f,0x4c,0x24,0xfc]
vpshufbitqmb -64(%rsp), %xmm23, %k1
// CHECK: vpshufbitqmb 64(%rsp), %xmm23, %k1
// CHECK: encoding: [0x62,0xf2,0x45,0x00,0x8f,0x4c,0x24,0x04]
vpshufbitqmb 64(%rsp), %xmm23, %k1
// CHECK: vpshufbitqmb 268435456(%rcx,%r14,8), %xmm23, %k1
// CHECK: encoding: [0x62,0xb2,0x45,0x00,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10]
vpshufbitqmb 268435456(%rcx,%r14,8), %xmm23, %k1
// CHECK: vpshufbitqmb -536870912(%rcx,%r14,8), %xmm23, %k1
// CHECK: encoding: [0x62,0xb2,0x45,0x00,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0]
vpshufbitqmb -536870912(%rcx,%r14,8), %xmm23, %k1
// CHECK: vpshufbitqmb -536870910(%rcx,%r14,8), %xmm23, %k1
// CHECK: encoding: [0x62,0xb2,0x45,0x00,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0]
vpshufbitqmb -536870910(%rcx,%r14,8), %xmm23, %k1
// CHECK: vpshufbitqmb (%rcx), %xmm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xf2,0x45,0x02,0x8f,0x09]
vpshufbitqmb (%rcx), %xmm23, %k1 {%k2}
// CHECK: vpshufbitqmb -64(%rsp), %xmm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xf2,0x45,0x02,0x8f,0x4c,0x24,0xfc]
vpshufbitqmb -64(%rsp), %xmm23, %k1 {%k2}
// CHECK: vpshufbitqmb 64(%rsp), %xmm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xf2,0x45,0x02,0x8f,0x4c,0x24,0x04]
vpshufbitqmb 64(%rsp), %xmm23, %k1 {%k2}
// CHECK: vpshufbitqmb 268435456(%rcx,%r14,8), %xmm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xb2,0x45,0x02,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10]
vpshufbitqmb 268435456(%rcx,%r14,8), %xmm23, %k1 {%k2}
// CHECK: vpshufbitqmb -536870912(%rcx,%r14,8), %xmm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xb2,0x45,0x02,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0]
vpshufbitqmb -536870912(%rcx,%r14,8), %xmm23, %k1 {%k2}
// CHECK: vpshufbitqmb -536870910(%rcx,%r14,8), %xmm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xb2,0x45,0x02,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0]
vpshufbitqmb -536870910(%rcx,%r14,8), %xmm23, %k1 {%k2}
// CHECK: vpshufbitqmb %ymm2, %ymm23, %k1
// CHECK: encoding: [0x62,0xf2,0x45,0x20,0x8f,0xca]
vpshufbitqmb %ymm2, %ymm23, %k1
// CHECK: vpshufbitqmb %ymm2, %ymm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xf2,0x45,0x22,0x8f,0xca]
vpshufbitqmb %ymm2, %ymm23, %k1 {%k2}
// CHECK: vpshufbitqmb (%rcx), %ymm23, %k1
// CHECK: encoding: [0x62,0xf2,0x45,0x20,0x8f,0x09]
vpshufbitqmb (%rcx), %ymm23, %k1
// CHECK: vpshufbitqmb -128(%rsp), %ymm23, %k1
// CHECK: encoding: [0x62,0xf2,0x45,0x20,0x8f,0x4c,0x24,0xfc]
vpshufbitqmb -128(%rsp), %ymm23, %k1
// CHECK: vpshufbitqmb 128(%rsp), %ymm23, %k1
// CHECK: encoding: [0x62,0xf2,0x45,0x20,0x8f,0x4c,0x24,0x04]
vpshufbitqmb 128(%rsp), %ymm23, %k1
// CHECK: vpshufbitqmb 268435456(%rcx,%r14,8), %ymm23, %k1
// CHECK: encoding: [0x62,0xb2,0x45,0x20,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10]
vpshufbitqmb 268435456(%rcx,%r14,8), %ymm23, %k1
// CHECK: vpshufbitqmb -536870912(%rcx,%r14,8), %ymm23, %k1
// CHECK: encoding: [0x62,0xb2,0x45,0x20,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0]
vpshufbitqmb -536870912(%rcx,%r14,8), %ymm23, %k1
// CHECK: vpshufbitqmb -536870910(%rcx,%r14,8), %ymm23, %k1
// CHECK: encoding: [0x62,0xb2,0x45,0x20,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0]
vpshufbitqmb -536870910(%rcx,%r14,8), %ymm23, %k1
// CHECK: vpshufbitqmb (%rcx), %ymm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xf2,0x45,0x22,0x8f,0x09]
vpshufbitqmb (%rcx), %ymm23, %k1 {%k2}
// CHECK: vpshufbitqmb -128(%rsp), %ymm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xf2,0x45,0x22,0x8f,0x4c,0x24,0xfc]
vpshufbitqmb -128(%rsp), %ymm23, %k1 {%k2}
// CHECK: vpshufbitqmb 128(%rsp), %ymm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xf2,0x45,0x22,0x8f,0x4c,0x24,0x04]
vpshufbitqmb 128(%rsp), %ymm23, %k1 {%k2}
// CHECK: vpshufbitqmb 268435456(%rcx,%r14,8), %ymm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xb2,0x45,0x22,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10]
vpshufbitqmb 268435456(%rcx,%r14,8), %ymm23, %k1 {%k2}
// CHECK: vpshufbitqmb -536870912(%rcx,%r14,8), %ymm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xb2,0x45,0x22,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0]
vpshufbitqmb -536870912(%rcx,%r14,8), %ymm23, %k1 {%k2}
// CHECK: vpshufbitqmb -536870910(%rcx,%r14,8), %ymm23, %k1 {%k2}
// CHECK: encoding: [0x62,0xb2,0x45,0x22,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0]
vpshufbitqmb -536870910(%rcx,%r14,8), %ymm23, %k1 {%k2}