mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-25 21:00:00 +00:00
Add 256-bit vaddsub, vhadd, vhsub, vblend and vdpp instructions!
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108769 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
58b8176ed3
commit
94143ee625
@ -56,6 +56,9 @@ public:
|
||||
void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
|
||||
printMemReference(MI, OpNo, O);
|
||||
}
|
||||
void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
|
||||
printMemReference(MI, OpNo, O);
|
||||
}
|
||||
void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
|
||||
printMemReference(MI, OpNo, O);
|
||||
}
|
||||
|
@ -64,6 +64,10 @@ public:
|
||||
O << "XMMWORD PTR ";
|
||||
printMemReference(MI, OpNo, O);
|
||||
}
|
||||
void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
|
||||
O << "YMMWORD PTR ";
|
||||
printMemReference(MI, OpNo, O);
|
||||
}
|
||||
void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
|
||||
O << "DWORD PTR ";
|
||||
printMemReference(MI, OpNo, O);
|
||||
|
@ -213,6 +213,7 @@ def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
|
||||
def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
|
||||
|
||||
// FIXME: move this to a more appropriate place after all AVX is done.
|
||||
def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
|
||||
def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
|
||||
def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
|
||||
|
||||
|
@ -222,7 +222,7 @@ def i16mem : X86MemOperand<"printi16mem">;
|
||||
def i32mem : X86MemOperand<"printi32mem">;
|
||||
def i64mem : X86MemOperand<"printi64mem">;
|
||||
def i128mem : X86MemOperand<"printi128mem">;
|
||||
//def i256mem : X86MemOperand<"printi256mem">;
|
||||
def i256mem : X86MemOperand<"printi256mem">;
|
||||
def f32mem : X86MemOperand<"printf32mem">;
|
||||
def f64mem : X86MemOperand<"printf64mem">;
|
||||
def f80mem : X86MemOperand<"printf80mem">;
|
||||
|
@ -3125,35 +3125,41 @@ let AddedComplexity = 20 in
|
||||
// SSE3 - Arithmetic
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, bit Is2Addr = 1> {
|
||||
multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
|
||||
X86MemOperand x86memop, bit Is2Addr = 1> {
|
||||
def rr : I<0xD0, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
(outs RC:$dst), (ins RC:$src1, RC:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set VR128:$dst, (Int VR128:$src1,
|
||||
VR128:$src2))]>;
|
||||
[(set RC:$dst, (Int RC:$src1, RC:$src2))]>;
|
||||
def rm : I<0xD0, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
|
||||
(outs RC:$dst), (ins RC:$src1, x86memop:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set VR128:$dst, (Int VR128:$src1,
|
||||
(memop addr:$src2)))]>;
|
||||
|
||||
[(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX],
|
||||
ExeDomain = SSEPackedDouble in {
|
||||
defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", 0>, XD,
|
||||
VEX_4V;
|
||||
defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", 0>, OpSize,
|
||||
VEX_4V;
|
||||
defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
|
||||
f128mem, 0>, XD, VEX_4V;
|
||||
defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
|
||||
f128mem, 0>, OpSize, VEX_4V;
|
||||
let Pattern = []<dag> in {
|
||||
defm VADDSUBPSY : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR256,
|
||||
f256mem, 0>, XD, VEX_4V;
|
||||
defm VADDSUBPDY : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR256,
|
||||
f256mem, 0>, OpSize, VEX_4V;
|
||||
}
|
||||
}
|
||||
let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
|
||||
ExeDomain = SSEPackedDouble in {
|
||||
defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps">, XD;
|
||||
defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd">, TB, OpSize;
|
||||
defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
|
||||
f128mem>, XD;
|
||||
defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
|
||||
f128mem>, TB, OpSize;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
@ -3161,51 +3167,65 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
// Horizontal ops
|
||||
class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
|
||||
: S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
|
||||
X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
|
||||
def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
|
||||
class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
|
||||
: S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
|
||||
[(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
|
||||
|
||||
def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>;
|
||||
class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
|
||||
: S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
[(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
|
||||
}
|
||||
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
|
||||
X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
|
||||
def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
|
||||
class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
|
||||
: S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
|
||||
[(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
|
||||
|
||||
def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>;
|
||||
[(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||
def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
|
||||
def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
|
||||
def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
|
||||
def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
|
||||
def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
|
||||
def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
|
||||
def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
|
||||
def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
|
||||
defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
|
||||
int_x86_sse3_hadd_ps, 0>, VEX_4V;
|
||||
defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
|
||||
int_x86_sse3_hadd_pd, 0>, VEX_4V;
|
||||
defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
|
||||
int_x86_sse3_hsub_ps, 0>, VEX_4V;
|
||||
defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
|
||||
int_x86_sse3_hsub_pd, 0>, VEX_4V;
|
||||
let Pattern = []<dag> in {
|
||||
defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
|
||||
int_x86_sse3_hadd_ps, 0>, VEX_4V;
|
||||
defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
|
||||
int_x86_sse3_hadd_pd, 0>, VEX_4V;
|
||||
defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
|
||||
int_x86_sse3_hsub_ps, 0>, VEX_4V;
|
||||
defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
|
||||
int_x86_sse3_hsub_pd, 0>, VEX_4V;
|
||||
}
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
|
||||
def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
|
||||
def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
|
||||
def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
|
||||
def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
|
||||
def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
|
||||
def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
|
||||
def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
|
||||
defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem,
|
||||
int_x86_sse3_hadd_ps>;
|
||||
defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem,
|
||||
int_x86_sse3_hadd_pd>;
|
||||
defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem,
|
||||
int_x86_sse3_hsub_ps>;
|
||||
defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem,
|
||||
int_x86_sse3_hsub_pd>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
@ -4431,79 +4451,98 @@ let Constraints = "$src1 = $dst" in
|
||||
|
||||
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
|
||||
multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId128, bit Is2Addr = 1> {
|
||||
Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
|
||||
X86MemOperand x86memop, bit Is2Addr = 1> {
|
||||
let isCommutable = 1 in
|
||||
def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||
def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, i32i8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
|
||||
OpSize;
|
||||
def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
|
||||
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(IntId128 VR128:$src1,
|
||||
(bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
|
||||
[(set RC:$dst,
|
||||
(IntId RC:$src1,
|
||||
(bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||
let isCommutable = 0 in {
|
||||
defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
|
||||
0>, VEX_4V;
|
||||
VR128, memopv16i8, i128mem, 0>, VEX_4V;
|
||||
defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
|
||||
0>, VEX_4V;
|
||||
VR128, memopv16i8, i128mem, 0>, VEX_4V;
|
||||
let Pattern = []<dag> in {
|
||||
defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
|
||||
VR256, memopv32i8, i256mem, 0>, VEX_4V;
|
||||
defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
|
||||
VR256, memopv32i8, i256mem, 0>, VEX_4V;
|
||||
}
|
||||
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
|
||||
0>, VEX_4V;
|
||||
VR128, memopv16i8, i128mem, 0>, VEX_4V;
|
||||
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
|
||||
0>, VEX_4V;
|
||||
VR128, memopv16i8, i128mem, 0>, VEX_4V;
|
||||
}
|
||||
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
|
||||
0>, VEX_4V;
|
||||
VR128, memopv16i8, i128mem, 0>, VEX_4V;
|
||||
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
|
||||
0>, VEX_4V;
|
||||
VR128, memopv16i8, i128mem, 0>, VEX_4V;
|
||||
let Pattern = []<dag> in
|
||||
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
|
||||
VR256, memopv32i8, i256mem, 0>, VEX_4V;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
let isCommutable = 0 in {
|
||||
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>;
|
||||
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>;
|
||||
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>;
|
||||
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>;
|
||||
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
|
||||
VR128, memopv16i8, i128mem>;
|
||||
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
|
||||
VR128, memopv16i8, i128mem>;
|
||||
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
|
||||
VR128, memopv16i8, i128mem>;
|
||||
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
|
||||
VR128, memopv16i8, i128mem>;
|
||||
}
|
||||
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>;
|
||||
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>;
|
||||
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
|
||||
VR128, memopv16i8, i128mem>;
|
||||
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
|
||||
VR128, memopv16i8, i128mem>;
|
||||
}
|
||||
|
||||
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||
multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr> {
|
||||
def rr : I<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
|
||||
multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
|
||||
RegisterClass RC, X86MemOperand x86memop> {
|
||||
def rr : I<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
|
||||
|
||||
def rm : I<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
|
||||
}
|
||||
def rm : I<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
|
||||
}
|
||||
}
|
||||
|
||||
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">;
|
||||
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">;
|
||||
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">;
|
||||
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem>;
|
||||
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem>;
|
||||
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem>;
|
||||
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem>;
|
||||
|
||||
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem>;
|
||||
|
||||
/// SS41I_ternary_int - SSE 4.1 ternary operator
|
||||
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
|
||||
|
@ -804,7 +804,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
|
||||
}];
|
||||
}
|
||||
|
||||
def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256,
|
||||
def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256,
|
||||
[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
|
||||
YMM8, YMM9, YMM10, YMM11,
|
||||
YMM12, YMM13, YMM14, YMM15]> {
|
||||
|
@ -12926,3 +12926,75 @@
|
||||
// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1f]
|
||||
vcmptrue_usps %ymm1, %ymm2, %ymm3
|
||||
|
||||
// CHECK: vaddsubps %ymm1, %ymm2, %ymm3
|
||||
// CHECK: encoding: [0xc5,0xef,0xd0,0xd9]
|
||||
vaddsubps %ymm1, %ymm2, %ymm3
|
||||
|
||||
// CHECK: vaddsubps (%eax), %ymm1, %ymm2
|
||||
// CHECK: encoding: [0xc5,0xf7,0xd0,0x10]
|
||||
vaddsubps (%eax), %ymm1, %ymm2
|
||||
|
||||
// CHECK: vaddsubpd %ymm1, %ymm2, %ymm3
|
||||
// CHECK: encoding: [0xc5,0xed,0xd0,0xd9]
|
||||
vaddsubpd %ymm1, %ymm2, %ymm3
|
||||
|
||||
// CHECK: vaddsubpd (%eax), %ymm1, %ymm2
|
||||
// CHECK: encoding: [0xc5,0xf5,0xd0,0x10]
|
||||
vaddsubpd (%eax), %ymm1, %ymm2
|
||||
|
||||
// CHECK: vhaddps %ymm1, %ymm2, %ymm3
|
||||
// CHECK: encoding: [0xc5,0xef,0x7c,0xd9]
|
||||
vhaddps %ymm1, %ymm2, %ymm3
|
||||
|
||||
// CHECK: vhaddps (%eax), %ymm2, %ymm3
|
||||
// CHECK: encoding: [0xc5,0xef,0x7c,0x18]
|
||||
vhaddps (%eax), %ymm2, %ymm3
|
||||
|
||||
// CHECK: vhaddpd %ymm1, %ymm2, %ymm3
|
||||
// CHECK: encoding: [0xc5,0xed,0x7c,0xd9]
|
||||
vhaddpd %ymm1, %ymm2, %ymm3
|
||||
|
||||
// CHECK: vhaddpd (%eax), %ymm2, %ymm3
|
||||
// CHECK: encoding: [0xc5,0xed,0x7c,0x18]
|
||||
vhaddpd (%eax), %ymm2, %ymm3
|
||||
|
||||
// CHECK: vhsubps %ymm1, %ymm2, %ymm3
|
||||
// CHECK: encoding: [0xc5,0xef,0x7d,0xd9]
|
||||
vhsubps %ymm1, %ymm2, %ymm3
|
||||
|
||||
// CHECK: vhsubps (%eax), %ymm2, %ymm3
|
||||
// CHECK: encoding: [0xc5,0xef,0x7d,0x18]
|
||||
vhsubps (%eax), %ymm2, %ymm3
|
||||
|
||||
// CHECK: vhsubpd %ymm1, %ymm2, %ymm3
|
||||
// CHECK: encoding: [0xc5,0xed,0x7d,0xd9]
|
||||
vhsubpd %ymm1, %ymm2, %ymm3
|
||||
|
||||
// CHECK: vhsubpd (%eax), %ymm2, %ymm3
|
||||
// CHECK: encoding: [0xc5,0xed,0x7d,0x18]
|
||||
vhsubpd (%eax), %ymm2, %ymm3
|
||||
|
||||
// CHECK: vblendps $3, %ymm2, %ymm5, %ymm1
|
||||
// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0xca,0x03]
|
||||
vblendps $3, %ymm2, %ymm5, %ymm1
|
||||
|
||||
// CHECK: vblendps $3, (%eax), %ymm5, %ymm1
|
||||
// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0x08,0x03]
|
||||
vblendps $3, (%eax), %ymm5, %ymm1
|
||||
|
||||
// CHECK: vblendpd $3, %ymm2, %ymm5, %ymm1
|
||||
// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0xca,0x03]
|
||||
vblendpd $3, %ymm2, %ymm5, %ymm1
|
||||
|
||||
// CHECK: vblendpd $3, (%eax), %ymm5, %ymm1
|
||||
// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0x08,0x03]
|
||||
vblendpd $3, (%eax), %ymm5, %ymm1
|
||||
|
||||
// CHECK: vdpps $3, %ymm2, %ymm5, %ymm1
|
||||
// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0xca,0x03]
|
||||
vdpps $3, %ymm2, %ymm5, %ymm1
|
||||
|
||||
// CHECK: vdpps $3, (%eax), %ymm5, %ymm1
|
||||
// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0x08,0x03]
|
||||
vdpps $3, (%eax), %ymm5, %ymm1
|
||||
|
||||
|
@ -3000,3 +3000,75 @@ pshufb CPI1_0(%rip), %xmm1
|
||||
// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1f]
|
||||
vcmptrue_usps %ymm11, %ymm12, %ymm13
|
||||
|
||||
// CHECK: vaddsubps %ymm11, %ymm12, %ymm13
|
||||
// CHECK: encoding: [0xc4,0x41,0x1f,0xd0,0xeb]
|
||||
vaddsubps %ymm11, %ymm12, %ymm13
|
||||
|
||||
// CHECK: vaddsubps (%rax), %ymm11, %ymm12
|
||||
// CHECK: encoding: [0xc5,0x27,0xd0,0x20]
|
||||
vaddsubps (%rax), %ymm11, %ymm12
|
||||
|
||||
// CHECK: vaddsubpd %ymm11, %ymm12, %ymm13
|
||||
// CHECK: encoding: [0xc4,0x41,0x1d,0xd0,0xeb]
|
||||
vaddsubpd %ymm11, %ymm12, %ymm13
|
||||
|
||||
// CHECK: vaddsubpd (%rax), %ymm11, %ymm12
|
||||
// CHECK: encoding: [0xc5,0x25,0xd0,0x20]
|
||||
vaddsubpd (%rax), %ymm11, %ymm12
|
||||
|
||||
// CHECK: vhaddps %ymm11, %ymm12, %ymm13
|
||||
// CHECK: encoding: [0xc4,0x41,0x1f,0x7c,0xeb]
|
||||
vhaddps %ymm11, %ymm12, %ymm13
|
||||
|
||||
// CHECK: vhaddps (%rax), %ymm12, %ymm13
|
||||
// CHECK: encoding: [0xc5,0x1f,0x7c,0x28]
|
||||
vhaddps (%rax), %ymm12, %ymm13
|
||||
|
||||
// CHECK: vhaddpd %ymm11, %ymm12, %ymm13
|
||||
// CHECK: encoding: [0xc4,0x41,0x1d,0x7c,0xeb]
|
||||
vhaddpd %ymm11, %ymm12, %ymm13
|
||||
|
||||
// CHECK: vhaddpd (%rax), %ymm12, %ymm13
|
||||
// CHECK: encoding: [0xc5,0x1d,0x7c,0x28]
|
||||
vhaddpd (%rax), %ymm12, %ymm13
|
||||
|
||||
// CHECK: vhsubps %ymm11, %ymm12, %ymm13
|
||||
// CHECK: encoding: [0xc4,0x41,0x1f,0x7d,0xeb]
|
||||
vhsubps %ymm11, %ymm12, %ymm13
|
||||
|
||||
// CHECK: vhsubps (%rax), %ymm12, %ymm13
|
||||
// CHECK: encoding: [0xc5,0x1f,0x7d,0x28]
|
||||
vhsubps (%rax), %ymm12, %ymm13
|
||||
|
||||
// CHECK: vhsubpd %ymm11, %ymm12, %ymm13
|
||||
// CHECK: encoding: [0xc4,0x41,0x1d,0x7d,0xeb]
|
||||
vhsubpd %ymm11, %ymm12, %ymm13
|
||||
|
||||
// CHECK: vhsubpd (%rax), %ymm12, %ymm13
|
||||
// CHECK: encoding: [0xc5,0x1d,0x7d,0x28]
|
||||
vhsubpd (%rax), %ymm12, %ymm13
|
||||
|
||||
// CHECK: vblendps $3, %ymm12, %ymm10, %ymm11
|
||||
// CHECK: encoding: [0xc4,0x43,0x2d,0x0c,0xdc,0x03]
|
||||
vblendps $3, %ymm12, %ymm10, %ymm11
|
||||
|
||||
// CHECK: vblendps $3, (%rax), %ymm10, %ymm11
|
||||
// CHECK: encoding: [0xc4,0x63,0x2d,0x0c,0x18,0x03]
|
||||
vblendps $3, (%rax), %ymm10, %ymm11
|
||||
|
||||
// CHECK: vblendpd $3, %ymm12, %ymm10, %ymm11
|
||||
// CHECK: encoding: [0xc4,0x43,0x2d,0x0d,0xdc,0x03]
|
||||
vblendpd $3, %ymm12, %ymm10, %ymm11
|
||||
|
||||
// CHECK: vblendpd $3, (%rax), %ymm10, %ymm11
|
||||
// CHECK: encoding: [0xc4,0x63,0x2d,0x0d,0x18,0x03]
|
||||
vblendpd $3, (%rax), %ymm10, %ymm11
|
||||
|
||||
// CHECK: vdpps $3, %ymm12, %ymm10, %ymm11
|
||||
// CHECK: encoding: [0xc4,0x43,0x2d,0x40,0xdc,0x03]
|
||||
vdpps $3, %ymm12, %ymm10, %ymm11
|
||||
|
||||
// CHECK: vdpps $3, (%rax), %ymm10, %ymm11
|
||||
// CHECK: encoding: [0xc4,0x63,0x2d,0x40,0x18,0x03]
|
||||
vdpps $3, (%rax), %ymm10, %ymm11
|
||||
|
||||
|
@ -339,6 +339,7 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type,
|
||||
MEM("f80mem");
|
||||
MEM("opaque80mem");
|
||||
MEM("i128mem");
|
||||
MEM("i256mem");
|
||||
MEM("f128mem");
|
||||
MEM("f256mem");
|
||||
MEM("opaque512mem");
|
||||
|
Loading…
Reference in New Issue
Block a user