mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-13 16:03:58 +00:00
[AVX512] Use X86VectorVTInfo in the masking helper classes and the FMAs
No functionality change. Makes the code more compact (see the FMA part). This needs a new type attribute MemOpFrag in X86VectorVTInfo. For now I only defined this in the simple cases. See the commment before the attribute. Diff of X86.td.expanded before and after is empty except for the appearance of the new attribute. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218637 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4edcbaec90
commit
e3d2fcce41
@ -52,6 +52,14 @@ class X86VectorVTInfo<int NumElts, ValueType EltVT, RegisterClass rc,
|
||||
VTName)), VTName));
|
||||
PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
|
||||
|
||||
// Load patterns used for memory operands. We only have this defined in
|
||||
// case of i64 element types for sub-512 integer vectors. For now, keep
|
||||
// MemOpFrag undefined in these cases.
|
||||
PatFrag MemOpFrag =
|
||||
!if (!eq (TypeVariantName, "f"), !cast<PatFrag>("memop" # VTName),
|
||||
!if (!eq (EltTypeName, "i64"), !cast<PatFrag>("memop" # VTName),
|
||||
!if (!eq (VTName, "v16i32"), !cast<PatFrag>("memop" # VTName), ?)));
|
||||
|
||||
// The corresponding float type, e.g. v16f32 for v16i32
|
||||
// Note: For EltSize < 32, FloatVT is illegal and TableGen
|
||||
// fails to compile, so we choose FloatVT = VT
|
||||
@ -77,6 +85,8 @@ def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
|
||||
def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
|
||||
def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
|
||||
def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
|
||||
def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
|
||||
def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
|
||||
|
||||
// "x" in v32i8x_info means RC = VR256X
|
||||
def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
|
||||
@ -107,24 +117,24 @@ def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
|
||||
|
||||
|
||||
// Common base class of AVX512_masking and AVX512_masking_3src.
|
||||
multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
|
||||
dag MaskingIns, dag ZeroMaskingIns,
|
||||
multiclass AVX512_masking_common<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
dag Outs,
|
||||
dag Ins, dag MaskingIns, dag ZeroMaskingIns,
|
||||
string OpcodeStr,
|
||||
string AttSrcAsm, string IntelSrcAsm,
|
||||
dag RHS, dag MaskingRHS, ValueType OpVT,
|
||||
RegisterClass RC, RegisterClass KRC,
|
||||
dag RHS, dag MaskingRHS,
|
||||
string MaskingConstraint = ""> {
|
||||
def NAME: AVX512<O, F, Outs, Ins,
|
||||
OpcodeStr#" \t{"#AttSrcAsm#", $dst|"#
|
||||
"$dst, "#IntelSrcAsm#"}",
|
||||
[(set RC:$dst, RHS)]>;
|
||||
[(set _.RC:$dst, RHS)]>;
|
||||
|
||||
// Prefer over VMOV*rrk Pat<>
|
||||
let AddedComplexity = 20 in
|
||||
def NAME#k: AVX512<O, F, Outs, MaskingIns,
|
||||
OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}}|"#
|
||||
"$dst {${mask}}, "#IntelSrcAsm#"}",
|
||||
[(set RC:$dst, MaskingRHS)]>,
|
||||
[(set _.RC:$dst, MaskingRHS)]>,
|
||||
EVEX_K {
|
||||
// In case of the 3src subclass this is overridden with a let.
|
||||
string Constraints = MaskingConstraint;
|
||||
@ -133,9 +143,9 @@ multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
|
||||
def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
|
||||
OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
|
||||
"$dst {${mask}} {z}, "#IntelSrcAsm#"}",
|
||||
[(set RC:$dst,
|
||||
(vselect KRC:$mask, RHS,
|
||||
(OpVT (bitconvert
|
||||
[(set _.RC:$dst,
|
||||
(vselect _.KRCWM:$mask, RHS,
|
||||
(_.VT (bitconvert
|
||||
(v16i32 immAllZerosV)))))]>,
|
||||
EVEX_KZ;
|
||||
}
|
||||
@ -143,34 +153,31 @@ multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
|
||||
// This multiclass generates the unconditional/non-masking, the masking and
|
||||
// the zero-masking variant of the instruction. In the masking case, the
|
||||
// perserved vector elements come from a new dummy input operand tied to $dst.
|
||||
multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
|
||||
string OpcodeStr,
|
||||
multiclass AVX512_masking<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
dag Outs, dag Ins, string OpcodeStr,
|
||||
string AttSrcAsm, string IntelSrcAsm,
|
||||
dag RHS, ValueType OpVT, RegisterClass RC,
|
||||
RegisterClass KRC> :
|
||||
AVX512_masking_common<O, F, Outs,
|
||||
Ins,
|
||||
!con((ins RC:$src0, KRC:$mask), Ins),
|
||||
!con((ins KRC:$mask), Ins),
|
||||
dag RHS> :
|
||||
AVX512_masking_common<O, F, _, Outs, Ins,
|
||||
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
|
||||
!con((ins _.KRCWM:$mask), Ins),
|
||||
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
|
||||
(vselect KRC:$mask, RHS, RC:$src0), OpVT, RC, KRC,
|
||||
(vselect _.KRCWM:$mask, RHS, _.RC:$src0),
|
||||
"$src0 = $dst">;
|
||||
|
||||
// Similar to AVX512_masking but in this case one of the source operands
|
||||
// ($src1) is already tied to $dst so we just use that for the preserved
|
||||
// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
|
||||
// $src1.
|
||||
multiclass AVX512_masking_3src<bits<8> O, Format F, dag Outs, dag NonTiedIns,
|
||||
string OpcodeStr,
|
||||
multiclass AVX512_masking_3src<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
dag Outs, dag NonTiedIns, string OpcodeStr,
|
||||
string AttSrcAsm, string IntelSrcAsm,
|
||||
dag RHS, ValueType OpVT,
|
||||
RegisterClass RC, RegisterClass KRC> :
|
||||
AVX512_masking_common<O, F, Outs,
|
||||
!con((ins RC:$src1), NonTiedIns),
|
||||
!con((ins RC:$src1, KRC:$mask), NonTiedIns),
|
||||
!con((ins RC:$src1, KRC:$mask), NonTiedIns),
|
||||
dag RHS> :
|
||||
AVX512_masking_common<O, F, _, Outs,
|
||||
!con((ins _.RC:$src1), NonTiedIns),
|
||||
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
|
||||
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
|
||||
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
|
||||
(vselect KRC:$mask, RHS, RC:$src1), OpVT, RC, KRC>;
|
||||
(vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;
|
||||
|
||||
// Bitcasts between 512-bit vector types. Return the original type since
|
||||
// no instruction is needed for the conversion
|
||||
@ -3267,156 +3274,130 @@ let Predicates = [HasAVX512] in {
|
||||
// FMA - Fused Multiply Operations
|
||||
//
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
|
||||
RegisterClass RC, X86MemOperand x86memop,
|
||||
PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
|
||||
string BrdcstStr, SDNode OpNode, ValueType OpVT,
|
||||
RegisterClass KRC> {
|
||||
defm r: AVX512_masking_3src<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src2, RC:$src3),
|
||||
multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _> {
|
||||
defm r: AVX512_masking_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.RC:$src3),
|
||||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)), OpVT, RC, KRC>,
|
||||
(_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
|
||||
AVX512FMA3Base;
|
||||
|
||||
let mayLoad = 1 in
|
||||
def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, x86memop:$src3),
|
||||
def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2, _.MemOp:$src3),
|
||||
!strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
|
||||
(mem_frag addr:$src3))))]>;
|
||||
def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
|
||||
!strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
|
||||
", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
|
||||
[(set RC:$dst, (OpNode RC:$src1, RC:$src2,
|
||||
(OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
|
||||
[(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2,
|
||||
(_.MemOpFrag addr:$src3))))]>;
|
||||
def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2, _.ScalarMemOp:$src3),
|
||||
!strconcat(OpcodeStr, " \t{${src3}", _.BroadcastStr,
|
||||
", $src2, $dst|$dst, $src2, ${src3}", _.BroadcastStr, "}"),
|
||||
[(set _.RC:$dst, (OpNode _.RC:$src1, _.RC:$src2,
|
||||
(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))]>, EVEX_B;
|
||||
}
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fmadd, v16f32, VK16WM>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fmsub, v16f32, VK16WM>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fmaddsub, v16f32, VK16WM>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fmsubadd, v16f32, VK16WM>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fnmadd, v16f32, VK16WM>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fnmsub, v16f32, VK16WM>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", X86Fmadd,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", X86Fmsub,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", X86Fmaddsub,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", X86Fmsubadd,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", X86Fnmadd,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", X86Fnmsub,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
}
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fmadd, v8f64, VK8WM>, EVEX_V512,
|
||||
VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fmaddsub, v8f64, VK8WM>,
|
||||
defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", X86Fmadd,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fmsubadd, v8f64, VK8WM>,
|
||||
defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", X86Fmsub,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", X86Fmaddsub,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", X86Fmsubadd,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", X86Fnmadd,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", X86Fnmsub,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
|
||||
RegisterClass RC, X86MemOperand x86memop,
|
||||
PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
|
||||
string BrdcstStr, SDNode OpNode, ValueType OpVT> {
|
||||
multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _> {
|
||||
let mayLoad = 1 in
|
||||
def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src3, x86memop:$src2),
|
||||
def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src3, _.MemOp:$src2),
|
||||
!strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
|
||||
def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
|
||||
!strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
|
||||
", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
|
||||
[(set RC:$dst, (OpNode RC:$src1,
|
||||
(OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
|
||||
[(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (_.MemOpFrag addr:$src2),
|
||||
_.RC:$src3)))]>;
|
||||
def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src3, _.ScalarMemOp:$src2),
|
||||
!strconcat(OpcodeStr, " \t{${src2}", _.BroadcastStr,
|
||||
", $src3, $dst|$dst, $src3, ${src2}", _.BroadcastStr, "}"),
|
||||
[(set _.RC:$dst,
|
||||
(OpNode _.RC:$src1, (_.VT (X86VBroadcast
|
||||
(_.ScalarLdFrag addr:$src2))),
|
||||
_.RC:$src3))]>, EVEX_B;
|
||||
}
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fmadd, v16f32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fmsub, v16f32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fmaddsub, v16f32>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fmsubadd, v16f32>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fnmadd, v16f32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fnmsub, v16f32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", X86Fmadd,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", X86Fmsub,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", X86Fmaddsub,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", X86Fmsubadd,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", X86Fnmadd,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", X86Fnmsub,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
}
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fmadd, v8f64>, EVEX_V512,
|
||||
VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fmsub, v8f64>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", X86Fmadd,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", X86Fmsub,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", X86Fmaddsub,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", X86Fmsubadd,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", X86Fnmadd,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", X86Fnmsub,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
|
||||
// Scalar FMA
|
||||
@ -4841,13 +4822,12 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1,
|
||||
(VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
|
||||
|
||||
multiclass avx512_valign<X86VectorVTInfo _> {
|
||||
defm rri : AVX512_masking<0x03, MRMSrcReg, (outs _.RC:$dst),
|
||||
defm rri : AVX512_masking<0x03, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
|
||||
"valign"##_.Suffix,
|
||||
"$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
(_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
|
||||
(i8 imm:$src3))),
|
||||
_.VT, _.RC, _.KRCWM>,
|
||||
(i8 imm:$src3)))>,
|
||||
AVX512AIi8Base, EVEX_4V;
|
||||
|
||||
// Also match valign of packed floats.
|
||||
|
Loading…
x
Reference in New Issue
Block a user