[AVX512] Adding VPERMB instruction

Differential Revision: http://reviews.llvm.org/D16294



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258144 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael Zuckerman 2016-01-19 17:07:43 +00:00
parent 21495bccdc
commit b0025fa67f
3 changed files with 142 additions and 2 deletions

View File

@ -4172,8 +4172,24 @@ multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
VTInfo.info256>, EVEX_V256;
}
multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
Predicate prd, SDNode OpNode,
AVX512VLVectorVTInfo _> {
let Predicates = [prd] in
defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
EVEX_V512 ;
let Predicates = [HasVLX, prd] in {
defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
EVEX_V256 ;
defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>,
EVEX_V128 ;
}
}
defm VPERM : avx512_var_shift_w<0x8D, "vpermw", X86VPermv>;
defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
avx512vl_i16_info>, VEX_W;
defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
avx512vl_i8_info>;
defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
avx512vl_i32_info>;

View File

@ -795,7 +795,8 @@ def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">;
def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
def HasBMI : Predicate<"Subtarget->hasBMI()">;
def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
def HasVBMI : Predicate<"Subtarget->hasVBMI()">;
def HasVBMI : Predicate<"Subtarget->hasVBMI()">,
AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">;
def HasRTM : Predicate<"Subtarget->hasRTM()">;
def HasHLE : Predicate<"Subtarget->hasHLE()">;
def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;

View File

@ -0,0 +1,123 @@
// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=skx -mattr=+avx512vl -mattr=+avx512vbmi --show-encoding %s | FileCheck %s
vpermb %xmm28, %xmm29, %xmm30 {%k7}
//CHECK: vpermb %xmm28, %xmm29, %xmm30 {%k7}
//CHECK: encoding: [0x62,0x02,0x15,0x07,0x8d,0xf4]
vpermb %xmm28, %xmm29, %xmm30 {%k7} {z}
//CHECK: vpermb %xmm28, %xmm29, %xmm30 {%k7} {z}
//CHECK: encoding: [0x62,0x02,0x15,0x87,0x8d,0xf4]
vpermb (%rcx), %xmm29, %xmm30
//CHECK: vpermb (%rcx), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x62,0x15,0x00,0x8d,0x31]
vpermb 0x123(%rax,%r14,8), %xmm29, %xmm30
//CHECK: vpermb 291(%rax,%r14,8), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x22,0x15,0x00,0x8d,0xb4,0xf0,0x23,0x01,0x00,0x00]
vpermb 0x7f0(%rdx), %xmm29, %xmm30
//CHECK: vpermb 2032(%rdx), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x62,0x15,0x00,0x8d,0x72,0x7f]
vpermb 0x800(%rdx), %xmm29, %xmm30
//CHECK: vpermb 2048(%rdx), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x62,0x15,0x00,0x8d,0xb2,0x00,0x08,0x00,0x00]
vpermb -0x800(%rdx), %xmm29, %xmm30
//CHECK: vpermb -2048(%rdx), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x62,0x15,0x00,0x8d,0x72,0x80]
vpermb -0x810(%rdx), %xmm29, %xmm30
//CHECK: vpermb -2064(%rdx), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x62,0x15,0x00,0x8d,0xb2,0xf0,0xf7,0xff,0xff]
vpermb %ymm28, %ymm29, %ymm30
//CHECK: vpermb %ymm28, %ymm29, %ymm30
//CHECK: encoding: [0x62,0x02,0x15,0x20,0x8d,0xf4]
vpermb %ymm28, %ymm29, %ymm30 {%k7}
//CHECK: vpermb %ymm28, %ymm29, %ymm30 {%k7}
//CHECK: encoding: [0x62,0x02,0x15,0x27,0x8d,0xf4]
vpermb %ymm28, %ymm29, %ymm30 {%k7} {z}
//CHECK: vpermb %ymm28, %ymm29, %ymm30 {%k7} {z}
//CHECK: encoding: [0x62,0x02,0x15,0xa7,0x8d,0xf4]
vpermb (%rcx), %ymm29, %ymm30
//CHECK: vpermb (%rcx), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x62,0x15,0x20,0x8d,0x31]
vpermb 0x123(%rax,%r14,8), %ymm29, %ymm30
//CHECK: vpermb 291(%rax,%r14,8), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x22,0x15,0x20,0x8d,0xb4,0xf0,0x23,0x01,0x00,0x00]
vpermb 0xfe0(%rdx), %ymm29, %ymm30
//CHECK: vpermb 4064(%rdx), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x62,0x15,0x20,0x8d,0x72,0x7f]
vpermb 0x1000(%rdx), %ymm29, %ymm30
//CHECK: vpermb 4096(%rdx), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x62,0x15,0x20,0x8d,0xb2,0x00,0x10,0x00,0x00]
vpermb -0x1000(%rdx), %ymm29, %ymm30
//CHECK: vpermb -4096(%rdx), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x62,0x15,0x20,0x8d,0x72,0x80]
vpermb -0x1020(%rdx), %ymm29, %ymm30
//CHECK: vpermb -4128(%rdx), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x62,0x15,0x20,0x8d,0xb2,0xe0,0xef,0xff,0xff]
vpermb %xmm28, %xmm29, %xmm30
//CHECK: vpermb %xmm28, %xmm29, %xmm30
//CHECK: encoding: [0x62,0x02,0x15,0x00,0x8d,0xf4]
vpermb 0x1234(%rax,%r14,8), %xmm29, %xmm30
//CHECK: vpermb 4660(%rax,%r14,8), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x22,0x15,0x00,0x8d,0xb4,0xf0,0x34,0x12,0x00,0x00]
vpermb 0x1234(%rax,%r14,8), %ymm29, %ymm30
//CHECK: vpermb 4660(%rax,%r14,8), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x22,0x15,0x20,0x8d,0xb4,0xf0,0x34,0x12,0x00,0x00]
vpermb %zmm28, %zmm29, %zmm30
//CHECK: vpermb %zmm28, %zmm29, %zmm30
//CHECK: encoding: [0x62,0x02,0x15,0x40,0x8d,0xf4]
vpermb %zmm28, %zmm29, %zmm30 {%k7}
//CHECK: vpermb %zmm28, %zmm29, %zmm30 {%k7}
//CHECK: encoding: [0x62,0x02,0x15,0x47,0x8d,0xf4]
vpermb %zmm28, %zmm29, %zmm30 {%k7} {z}
//CHECK: vpermb %zmm28, %zmm29, %zmm30 {%k7} {z}
//CHECK: encoding: [0x62,0x02,0x15,0xc7,0x8d,0xf4]
vpermb (%rcx), %zmm29, %zmm30
//CHECK: vpermb (%rcx), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x62,0x15,0x40,0x8d,0x31]
vpermb 0x123(%rax,%r14,8), %zmm29, %zmm30
//CHECK: vpermb 291(%rax,%r14,8), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x22,0x15,0x40,0x8d,0xb4,0xf0,0x23,0x01,0x00,0x00]
vpermb 0x1fc0(%rdx), %zmm29, %zmm30
//CHECK: vpermb 8128(%rdx), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x62,0x15,0x40,0x8d,0x72,0x7f]
vpermb 0x2000(%rdx), %zmm29, %zmm30
//CHECK: vpermb 8192(%rdx), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x62,0x15,0x40,0x8d,0xb2,0x00,0x20,0x00,0x00]
vpermb -0x2000(%rdx), %zmm29, %zmm30
//CHECK: vpermb -8192(%rdx), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x62,0x15,0x40,0x8d,0x72,0x80]
vpermb -0x2040(%rdx), %zmm29, %zmm30
//CHECK: vpermb -8256(%rdx), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x62,0x15,0x40,0x8d,0xb2,0xc0,0xdf,0xff,0xff]
vpermb 0x1234(%rax,%r14,8), %zmm29, %zmm30
//CHECK: vpermb 4660(%rax,%r14,8), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x22,0x15,0x40,0x8d,0xb4,0xf0,0x34,0x12,0x00,0x00]