[AVX512] Adding VPERMT2B and VPERMI2B instruction .

Differential Revision: http://reviews.llvm.org/D16297


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258161 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael Zuckerman 2016-01-19 18:47:02 +00:00
parent 477926b924
commit dee7d6fcc6
2 changed files with 261 additions and 12 deletions

View File

@ -1184,13 +1184,14 @@ multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
}
}
multiclass avx512_perm_i_sizes_w<bits<8> opc, string OpcodeStr,
multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo,
AVX512VLVectorVTInfo Idx> {
let Predicates = [HasBWI] in
AVX512VLVectorVTInfo Idx,
Predicate Prd> {
let Predicates = [Prd] in
defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
Idx.info512>, EVEX_V512;
let Predicates = [HasBWI, HasVLX] in {
let Predicates = [Prd, HasVLX] in {
defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
Idx.info128>, EVEX_V128;
defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
@ -1202,8 +1203,12 @@ defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d",
avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q",
avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPERMI2W : avx512_perm_i_sizes_w<0x75, "vpermi2w",
avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w",
avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
VEX_W, EVEX_CD8<16, CD8VF>;
defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b",
avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
EVEX_CD8<8, CD8VF>;
defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps",
avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd",
@ -1259,13 +1264,14 @@ multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
}
}
multiclass avx512_perm_t_sizes_w<bits<8> opc, string OpcodeStr,
multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo,
AVX512VLVectorVTInfo Idx> {
let Predicates = [HasBWI] in
AVX512VLVectorVTInfo Idx,
Predicate Prd> {
let Predicates = [Prd] in
defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
Idx.info512>, EVEX_V512;
let Predicates = [HasBWI, HasVLX] in {
let Predicates = [Prd, HasVLX] in {
defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
Idx.info128>, EVEX_V128;
defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
@ -1277,8 +1283,12 @@ defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d",
avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q",
avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPERMT2W : avx512_perm_t_sizes_w<0x7D, "vpermt2w",
avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w",
avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
VEX_W, EVEX_CD8<16, CD8VF>;
defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b",
avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
EVEX_CD8<8, CD8VF>;
defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps",
avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd",

View File

@ -120,4 +120,243 @@
//CHECK: vpermb 4660(%rax,%r14,8), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x22,0x15,0x40,0x8d,0xb4,0xf0,0x34,0x12,0x00,0x00]
vpermt2b %xmm28, %xmm29, %xmm30
//CHECK: vpermt2b %xmm28, %xmm29, %xmm30
//CHECK: encoding: [0x62,0x02,0x15,0x00,0x7d,0xf4]
vpermt2b %xmm28, %xmm29, %xmm30 {%k7}
//CHECK: vpermt2b %xmm28, %xmm29, %xmm30 {%k7}
//CHECK: encoding: [0x62,0x02,0x15,0x07,0x7d,0xf4]
vpermt2b %xmm28, %xmm29, %xmm30 {%k7} {z}
//CHECK: vpermt2b %xmm28, %xmm29, %xmm30 {%k7} {z}
//CHECK: encoding: [0x62,0x02,0x15,0x87,0x7d,0xf4]
vpermt2b (%rcx), %xmm29, %xmm30
//CHECK: vpermt2b (%rcx), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x62,0x15,0x00,0x7d,0x31]
vpermt2b 0x123(%rax,%r14,8), %xmm29, %xmm30
//CHECK: vpermt2b 291(%rax,%r14,8), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x22,0x15,0x00,0x7d,0xb4,0xf0,0x23,0x01,0x00,0x00]
vpermt2b 0x7f0(%rdx), %xmm29, %xmm30
//CHECK: vpermt2b 2032(%rdx), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x62,0x15,0x00,0x7d,0x72,0x7f]
vpermt2b 0x800(%rdx), %xmm29, %xmm30
//CHECK: vpermt2b 2048(%rdx), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x62,0x15,0x00,0x7d,0xb2,0x00,0x08,0x00,0x00]
vpermt2b -0x800(%rdx), %xmm29, %xmm30
//CHECK: vpermt2b -2048(%rdx), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x62,0x15,0x00,0x7d,0x72,0x80]
vpermt2b -0x810(%rdx), %xmm29, %xmm30
//CHECK: vpermt2b -2064(%rdx), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x62,0x15,0x00,0x7d,0xb2,0xf0,0xf7,0xff,0xff]
vpermt2b %ymm28, %ymm29, %ymm30
//CHECK: vpermt2b %ymm28, %ymm29, %ymm30
//CHECK: encoding: [0x62,0x02,0x15,0x20,0x7d,0xf4]
vpermt2b %ymm28, %ymm29, %ymm30 {%k7}
//CHECK: vpermt2b %ymm28, %ymm29, %ymm30 {%k7}
//CHECK: encoding: [0x62,0x02,0x15,0x27,0x7d,0xf4]
vpermt2b %ymm28, %ymm29, %ymm30 {%k7} {z}
//CHECK: vpermt2b %ymm28, %ymm29, %ymm30 {%k7} {z}
//CHECK: encoding: [0x62,0x02,0x15,0xa7,0x7d,0xf4]
vpermt2b (%rcx), %ymm29, %ymm30
//CHECK: vpermt2b (%rcx), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x62,0x15,0x20,0x7d,0x31]
vpermt2b 0x123(%rax,%r14,8), %ymm29, %ymm30
//CHECK: vpermt2b 291(%rax,%r14,8), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x22,0x15,0x20,0x7d,0xb4,0xf0,0x23,0x01,0x00,0x00]
vpermt2b 0xfe0(%rdx), %ymm29, %ymm30
//CHECK: vpermt2b 4064(%rdx), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x62,0x15,0x20,0x7d,0x72,0x7f]
vpermt2b 0x1000(%rdx), %ymm29, %ymm30
//CHECK: vpermt2b 4096(%rdx), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x62,0x15,0x20,0x7d,0xb2,0x00,0x10,0x00,0x00]
vpermt2b -0x1000(%rdx), %ymm29, %ymm30
//CHECK: vpermt2b -4096(%rdx), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x62,0x15,0x20,0x7d,0x72,0x80]
vpermt2b -0x1020(%rdx), %ymm29, %ymm30
//CHECK: vpermt2b -4128(%rdx), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x62,0x15,0x20,0x7d,0xb2,0xe0,0xef,0xff,0xff]
vpermt2b 0x1234(%rax,%r14,8), %xmm29, %xmm30
//CHECK: vpermt2b 4660(%rax,%r14,8), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x22,0x15,0x00,0x7d,0xb4,0xf0,0x34,0x12,0x00,0x00]
vpermt2b 0x1234(%rax,%r14,8), %ymm29, %ymm30
//CHECK: vpermt2b 4660(%rax,%r14,8), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x22,0x15,0x20,0x7d,0xb4,0xf0,0x34,0x12,0x00,0x00]
vpermt2b %zmm28, %zmm29, %zmm30
//CHECK: vpermt2b %zmm28, %zmm29, %zmm30
//CHECK: encoding: [0x62,0x02,0x15,0x40,0x7d,0xf4]
vpermt2b %zmm28, %zmm29, %zmm30 {%k7}
//CHECK: vpermt2b %zmm28, %zmm29, %zmm30 {%k7}
//CHECK: encoding: [0x62,0x02,0x15,0x47,0x7d,0xf4]
vpermt2b %zmm28, %zmm29, %zmm30 {%k7} {z}
//CHECK: vpermt2b %zmm28, %zmm29, %zmm30 {%k7} {z}
//CHECK: encoding: [0x62,0x02,0x15,0xc7,0x7d,0xf4]
vpermt2b (%rcx), %zmm29, %zmm30
//CHECK: vpermt2b (%rcx), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x62,0x15,0x40,0x7d,0x31]
vpermt2b 0x123(%rax,%r14,8), %zmm29, %zmm30
//CHECK: vpermt2b 291(%rax,%r14,8), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x22,0x15,0x40,0x7d,0xb4,0xf0,0x23,0x01,0x00,0x00]
vpermt2b 0x1fc0(%rdx), %zmm29, %zmm30
//CHECK: vpermt2b 8128(%rdx), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x62,0x15,0x40,0x7d,0x72,0x7f]
vpermt2b 0x2000(%rdx), %zmm29, %zmm30
//CHECK: vpermt2b 8192(%rdx), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x62,0x15,0x40,0x7d,0xb2,0x00,0x20,0x00,0x00]
vpermt2b -0x2000(%rdx), %zmm29, %zmm30
//CHECK: vpermt2b -8192(%rdx), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x62,0x15,0x40,0x7d,0x72,0x80]
vpermt2b -0x2040(%rdx), %zmm29, %zmm30
//CHECK: vpermt2b -8256(%rdx), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x62,0x15,0x40,0x7d,0xb2,0xc0,0xdf,0xff,0xff]
vpermt2b 0x1234(%rax,%r14,8), %zmm29, %zmm30
//CHECK: vpermt2b 4660(%rax,%r14,8), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x22,0x15,0x40,0x7d,0xb4,0xf0,0x34,0x12,0x00,0x00]
vpermi2b %xmm28, %xmm29, %xmm30
//CHECK: vpermi2b %xmm28, %xmm29, %xmm30
//CHECK: encoding: [0x62,0x02,0x15,0x00,0x75,0xf4]
vpermi2b %xmm28, %xmm29, %xmm30 {%k7}
//CHECK: vpermi2b %xmm28, %xmm29, %xmm30 {%k7}
//CHECK: encoding: [0x62,0x02,0x15,0x07,0x75,0xf4]
vpermi2b %xmm28, %xmm29, %xmm30 {%k7} {z}
//CHECK: vpermi2b %xmm28, %xmm29, %xmm30 {%k7} {z}
//CHECK: encoding: [0x62,0x02,0x15,0x87,0x75,0xf4]
vpermi2b (%rcx), %xmm29, %xmm30
//CHECK: vpermi2b (%rcx), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x62,0x15,0x00,0x75,0x31]
vpermi2b 0x123(%rax,%r14,8), %xmm29, %xmm30
//CHECK: vpermi2b 291(%rax,%r14,8), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x22,0x15,0x00,0x75,0xb4,0xf0,0x23,0x01,0x00,0x00]
vpermi2b 0x7f0(%rdx), %xmm29, %xmm30
//CHECK: vpermi2b 2032(%rdx), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x62,0x15,0x00,0x75,0x72,0x7f]
vpermi2b 0x800(%rdx), %xmm29, %xmm30
//CHECK: vpermi2b 2048(%rdx), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x62,0x15,0x00,0x75,0xb2,0x00,0x08,0x00,0x00]
vpermi2b -0x800(%rdx), %xmm29, %xmm30
//CHECK: vpermi2b -2048(%rdx), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x62,0x15,0x00,0x75,0x72,0x80]
vpermi2b -0x810(%rdx), %xmm29, %xmm30
//CHECK: vpermi2b -2064(%rdx), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x62,0x15,0x00,0x75,0xb2,0xf0,0xf7,0xff,0xff]
vpermi2b %ymm28, %ymm29, %ymm30
//CHECK: vpermi2b %ymm28, %ymm29, %ymm30
//CHECK: encoding: [0x62,0x02,0x15,0x20,0x75,0xf4]
vpermi2b %ymm28, %ymm29, %ymm30 {%k7}
//CHECK: vpermi2b %ymm28, %ymm29, %ymm30 {%k7}
//CHECK: encoding: [0x62,0x02,0x15,0x27,0x75,0xf4]
vpermi2b %ymm28, %ymm29, %ymm30 {%k7} {z}
//CHECK: vpermi2b %ymm28, %ymm29, %ymm30 {%k7} {z}
//CHECK: encoding: [0x62,0x02,0x15,0xa7,0x75,0xf4]
vpermi2b (%rcx), %ymm29, %ymm30
//CHECK: vpermi2b (%rcx), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x62,0x15,0x20,0x75,0x31]
vpermi2b 0x123(%rax,%r14,8), %ymm29, %ymm30
//CHECK: vpermi2b 291(%rax,%r14,8), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x22,0x15,0x20,0x75,0xb4,0xf0,0x23,0x01,0x00,0x00]
vpermi2b 0xfe0(%rdx), %ymm29, %ymm30
//CHECK: vpermi2b 4064(%rdx), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x62,0x15,0x20,0x75,0x72,0x7f]
vpermi2b 0x1000(%rdx), %ymm29, %ymm30
//CHECK: vpermi2b 4096(%rdx), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x62,0x15,0x20,0x75,0xb2,0x00,0x10,0x00,0x00]
vpermi2b -0x1000(%rdx), %ymm29, %ymm30
//CHECK: vpermi2b -4096(%rdx), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x62,0x15,0x20,0x75,0x72,0x80]
vpermi2b -0x1020(%rdx), %ymm29, %ymm30
//CHECK: vpermi2b -4128(%rdx), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x62,0x15,0x20,0x75,0xb2,0xe0,0xef,0xff,0xff]
vpermi2b 0x1234(%rax,%r14,8), %xmm29, %xmm30
//CHECK: vpermi2b 4660(%rax,%r14,8), %xmm29, %xmm30
//CHECK: encoding: [0x62,0x22,0x15,0x00,0x75,0xb4,0xf0,0x34,0x12,0x00,0x00]
vpermi2b 0x1234(%rax,%r14,8), %ymm29, %ymm30
//CHECK: vpermi2b 4660(%rax,%r14,8), %ymm29, %ymm30
//CHECK: encoding: [0x62,0x22,0x15,0x20,0x75,0xb4,0xf0,0x34,0x12,0x00,0x00]
vpermi2b %zmm28, %zmm29, %zmm30
//CHECK: vpermi2b %zmm28, %zmm29, %zmm30
//CHECK: encoding: [0x62,0x02,0x15,0x40,0x75,0xf4]
vpermi2b %zmm28, %zmm29, %zmm30 {%k7}
//CHECK: vpermi2b %zmm28, %zmm29, %zmm30 {%k7}
//CHECK: encoding: [0x62,0x02,0x15,0x47,0x75,0xf4]
vpermi2b %zmm28, %zmm29, %zmm30 {%k7} {z}
//CHECK: vpermi2b %zmm28, %zmm29, %zmm30 {%k7} {z}
//CHECK: encoding: [0x62,0x02,0x15,0xc7,0x75,0xf4]
vpermi2b (%rcx), %zmm29, %zmm30
//CHECK: vpermi2b (%rcx), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x62,0x15,0x40,0x75,0x31]
vpermi2b 0x123(%rax,%r14,8), %zmm29, %zmm30
//CHECK: vpermi2b 291(%rax,%r14,8), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x22,0x15,0x40,0x75,0xb4,0xf0,0x23,0x01,0x00,0x00]
vpermi2b 0x1fc0(%rdx), %zmm29, %zmm30
//CHECK: vpermi2b 8128(%rdx), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x62,0x15,0x40,0x75,0x72,0x7f]
vpermi2b 0x2000(%rdx), %zmm29, %zmm30
//CHECK: vpermi2b 8192(%rdx), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x62,0x15,0x40,0x75,0xb2,0x00,0x20,0x00,0x00]
vpermi2b -0x2000(%rdx), %zmm29, %zmm30
//CHECK: vpermi2b -8192(%rdx), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x62,0x15,0x40,0x75,0x72,0x80]
vpermi2b -0x2040(%rdx), %zmm29, %zmm30
//CHECK: vpermi2b -8256(%rdx), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x62,0x15,0x40,0x75,0xb2,0xc0,0xdf,0xff,0xff]
vpermi2b 0x1234(%rax,%r14,8), %zmm29, %zmm30
//CHECK: vpermi2b 4660(%rax,%r14,8), %zmm29, %zmm30
//CHECK: encoding: [0x62,0x22,0x15,0x40,0x75,0xb4,0xf0,0x34,0x12,0x00,0x00]