mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-13 14:46:15 +00:00
[AVX512] Add DQ subvector inserts
In AVX512f we support 64x2 and 32x8 inserts via matching them to 32x4 and 64x4 respectively. These are matched by "Alt" Pat<>'s (Alt stands for alternative VTs). Since DQ has native support for these intructions, I peeled off the non-"Alt" part of the baseclass into vinsert_for_size_no_alt. The DQ instructions are derived from this multiclass. The "Alt" Pat<>'s are disabled with DQ. Fixes <rdar://problem/18426089> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219874 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ec7f30662e
commit
fb9d61a8d6
@ -347,11 +347,10 @@ def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
|
||||
// AVX-512 - VECTOR INSERT
|
||||
//
|
||||
|
||||
multiclass vinsert_for_size<int Opcode,
|
||||
X86VectorVTInfo From, X86VectorVTInfo To,
|
||||
X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
|
||||
PatFrag vinsert_insert,
|
||||
SDNodeXForm INSERT_get_vinsert_imm> {
|
||||
multiclass vinsert_for_size_no_alt<int Opcode,
|
||||
X86VectorVTInfo From, X86VectorVTInfo To,
|
||||
PatFrag vinsert_insert,
|
||||
SDNodeXForm INSERT_get_vinsert_imm> {
|
||||
let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
|
||||
def rr : AVX512AIi8<Opcode, MRMSrcReg, (outs VR512:$dst),
|
||||
(ins VR512:$src1, From.RC:$src2, i8imm:$src3),
|
||||
@ -372,14 +371,24 @@ multiclass vinsert_for_size<int Opcode,
|
||||
[]>,
|
||||
EVEX_4V, EVEX_V512, EVEX_CD8<From.EltSize, From.CD8TupleForm>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass vinsert_for_size<int Opcode,
|
||||
X86VectorVTInfo From, X86VectorVTInfo To,
|
||||
X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
|
||||
PatFrag vinsert_insert,
|
||||
SDNodeXForm INSERT_get_vinsert_imm> :
|
||||
vinsert_for_size_no_alt<Opcode, From, To,
|
||||
vinsert_insert, INSERT_get_vinsert_imm> {
|
||||
// Codegen pattern with the alternative types, e.g. v2i64 -> v8i64 for
|
||||
// vinserti32x4
|
||||
def : Pat<(vinsert_insert:$ins
|
||||
(AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)),
|
||||
(AltTo.VT (!cast<Instruction>(NAME # From.EltSize # "x4rr")
|
||||
VR512:$src1, From.RC:$src2,
|
||||
(INSERT_get_vinsert_imm VR512:$ins)))>;
|
||||
// vinserti32x4. Only add this if 64x2 and friends are not supported
|
||||
// natively via AVX512DQ.
|
||||
let Predicates = [NoDQI] in
|
||||
def : Pat<(vinsert_insert:$ins
|
||||
(AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)),
|
||||
(AltTo.VT (!cast<Instruction>(NAME # From.EltSize # "x4rr")
|
||||
VR512:$src1, From.RC:$src2,
|
||||
(INSERT_get_vinsert_imm VR512:$ins)))>;
|
||||
}
|
||||
|
||||
multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
|
||||
@ -391,6 +400,12 @@ multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
|
||||
X86VectorVTInfo< 8, EltVT64, VR512>,
|
||||
vinsert128_insert,
|
||||
INSERT_get_vinsert128_imm>;
|
||||
let Predicates = [HasDQI] in
|
||||
defm NAME # "64x2" : vinsert_for_size_no_alt<Opcode128,
|
||||
X86VectorVTInfo< 2, EltVT64, VR128X>,
|
||||
X86VectorVTInfo< 8, EltVT64, VR512>,
|
||||
vinsert128_insert,
|
||||
INSERT_get_vinsert128_imm>, VEX_W;
|
||||
defm NAME # "64x4" : vinsert_for_size<Opcode256,
|
||||
X86VectorVTInfo< 4, EltVT64, VR256X>,
|
||||
X86VectorVTInfo< 8, EltVT64, VR512>,
|
||||
@ -398,6 +413,12 @@ multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
|
||||
X86VectorVTInfo<16, EltVT32, VR512>,
|
||||
vinsert256_insert,
|
||||
INSERT_get_vinsert256_imm>, VEX_W;
|
||||
let Predicates = [HasDQI] in
|
||||
defm NAME # "32x8" : vinsert_for_size_no_alt<Opcode256,
|
||||
X86VectorVTInfo< 8, EltVT32, VR256X>,
|
||||
X86VectorVTInfo<16, EltVT32, VR512>,
|
||||
vinsert256_insert,
|
||||
INSERT_get_vinsert256_imm>;
|
||||
}
|
||||
|
||||
defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a>;
|
||||
|
@ -710,6 +710,7 @@ def HasCDI : Predicate<"Subtarget->hasCDI()">;
|
||||
def HasPFI : Predicate<"Subtarget->hasPFI()">;
|
||||
def HasERI : Predicate<"Subtarget->hasERI()">;
|
||||
def HasDQI : Predicate<"Subtarget->hasDQI()">;
|
||||
def NoDQI : Predicate<"!Subtarget->hasDQI()">;
|
||||
def HasBWI : Predicate<"Subtarget->hasBWI()">;
|
||||
def HasVLX : Predicate<"Subtarget->hasVLX()">,
|
||||
AssemblerPredicate<"FeatureVLX", "AVX-512 VLX ISA">;
|
||||
|
@ -13,9 +13,11 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
|
||||
}
|
||||
|
||||
;CHECK-LABEL: test2:
|
||||
;CHECK: vinsertf32x4 $0
|
||||
;KNL: vinsertf32x4 $0
|
||||
;SKX: vinsertf64x2 $0
|
||||
;CHECK: vextractf32x4 $3
|
||||
;CHECK: vinsertf32x4 $3
|
||||
;KNL: vinsertf32x4 $3
|
||||
;SKX: vinsertf64x2 $3
|
||||
;CHECK: ret
|
||||
define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
|
||||
%rrr = load double* %br
|
||||
@ -36,7 +38,8 @@ define <16 x float> @test3(<16 x float> %x) nounwind {
|
||||
|
||||
;CHECK-LABEL: test4:
|
||||
;CHECK: vextracti32x4 $2
|
||||
;CHECK: vinserti32x4 $0
|
||||
;KNL: vinserti32x4 $0
|
||||
;SKX: vinserti64x2 $0
|
||||
;CHECK: ret
|
||||
define <8 x i64> @test4(<8 x i64> %x) nounwind {
|
||||
%eee = extractelement <8 x i64> %x, i32 4
|
||||
|
@ -4205,6 +4205,14 @@ vinserti32x4 $1, %xmm21, %zmm5, %zmm17
|
||||
// CHECK: encoding: [0x62,0xe3,0x1d,0x40,0x38,0x4f,0x10,0x01]
|
||||
vinserti32x4 $1, 256(%rdi), %zmm28, %zmm17
|
||||
|
||||
// CHECK: vinserti32x8
|
||||
// CHECK: encoding: [0x62,0xd3,0x4d,0x40,0x3a,0xdb,0x01]
|
||||
vinserti32x8 $1, %ymm11, %zmm22, %zmm3
|
||||
|
||||
// CHECK: vinsertf64x2
|
||||
// CHECK: encoding: [0x62,0xf3,0xed,0x48,0x18,0x4f,0x10,0x01]
|
||||
vinsertf64x2 $1, 256(%rdi), %zmm2, %zmm1
|
||||
|
||||
// CHECK: vextracti32x4
|
||||
// CHECK: encoding: [0x62,0x33,0x7d,0x48,0x39,0xc9,0x01]
|
||||
vextracti32x4 $1, %zmm9, %xmm17
|
||||
|
Loading…
x
Reference in New Issue
Block a user