mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-22 22:11:50 +00:00
AVX-512: Added CMP and BLEND instructions.
Lowering for SETCC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188265 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bd980e5569
commit
4d36bd80e6
@ -9705,6 +9705,42 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
|
||||
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
|
||||
}
|
||||
|
||||
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
|
||||
SDValue Cond;
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
SDValue CC = Op.getOperand(2);
|
||||
MVT VT = Op.getValueType().getSimpleVT();
|
||||
|
||||
EVT OpVT = Op0.getValueType();
|
||||
assert(OpVT.getVectorElementType().getSizeInBits() >= 32 &&
|
||||
Op.getValueType().getScalarType() == MVT::i1 &&
|
||||
"Cannot set masked compare for this operation");
|
||||
|
||||
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
|
||||
SDLoc dl(Op);
|
||||
|
||||
bool Unsigned = false;
|
||||
unsigned SSECC;
|
||||
switch (SetCCOpcode) {
|
||||
default: llvm_unreachable("Unexpected SETCC condition");
|
||||
case ISD::SETNE: SSECC = 4; break;
|
||||
case ISD::SETEQ: SSECC = 0; break;
|
||||
case ISD::SETUGT: Unsigned = true;
|
||||
case ISD::SETGT: SSECC = 6; break; // NLE
|
||||
case ISD::SETULT: Unsigned = true;
|
||||
case ISD::SETLT: SSECC = 1; break;
|
||||
case ISD::SETUGE: Unsigned = true;
|
||||
case ISD::SETGE: SSECC = 5; break; // NLT
|
||||
case ISD::SETULE: Unsigned = true;
|
||||
case ISD::SETLE: SSECC = 2; break;
|
||||
}
|
||||
unsigned Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
|
||||
return DAG.getNode(Opc, dl, VT, Op0, Op1,
|
||||
DAG.getConstant(SSECC, MVT::i8));
|
||||
|
||||
}
|
||||
|
||||
static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
SDValue Cond;
|
||||
@ -9723,7 +9759,12 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
|
||||
#endif
|
||||
|
||||
unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1);
|
||||
|
||||
unsigned Opc = X86ISD::CMPP;
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
if (Subtarget->hasAVX512() && VT.getVectorElementType() == MVT::i1) {
|
||||
assert(NumElems <=16);
|
||||
Opc = X86ISD::CMPM;
|
||||
}
|
||||
// In the two special cases we can't handle, emit two comparisons.
|
||||
if (SSECC == 8) {
|
||||
unsigned CC0, CC1;
|
||||
@ -9735,14 +9776,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
|
||||
CC0 = 7; CC1 = 4; CombineOpc = ISD::AND;
|
||||
}
|
||||
|
||||
SDValue Cmp0 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
|
||||
SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
|
||||
DAG.getConstant(CC0, MVT::i8));
|
||||
SDValue Cmp1 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
|
||||
SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
|
||||
DAG.getConstant(CC1, MVT::i8));
|
||||
return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
|
||||
}
|
||||
// Handle all other FP comparisons here.
|
||||
return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
|
||||
return DAG.getNode(Opc, dl, VT, Op0, Op1,
|
||||
DAG.getConstant(SSECC, MVT::i8));
|
||||
}
|
||||
|
||||
@ -9750,6 +9791,24 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
|
||||
if (VT.is256BitVector() && !Subtarget->hasInt256())
|
||||
return Lower256IntVSETCC(Op, DAG);
|
||||
|
||||
bool MaskResult = (VT.getVectorElementType() == MVT::i1);
|
||||
EVT OpVT = Op1.getValueType();
|
||||
if (Subtarget->hasAVX512()) {
|
||||
if (Op1.getValueType().is512BitVector() ||
|
||||
(MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
|
||||
return LowerIntVSETCC_AVX512(Op, DAG);
|
||||
|
||||
// In AVX-512 architecture setcc returns mask with i1 elements,
|
||||
// But there is no compare instruction for i8 and i16 elements.
|
||||
// We are not talking about 512-bit operands in this case, these
|
||||
// types are illegal.
|
||||
if (MaskResult &&
|
||||
(OpVT.getVectorElementType().getSizeInBits() < 32 &&
|
||||
OpVT.getVectorElementType().getSizeInBits() >= 8))
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, VT,
|
||||
DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
|
||||
}
|
||||
|
||||
// We are handling one of the integer comparisons here. Since SSE only has
|
||||
// GT and EQ comparisons for integer, swapping operands and multiple
|
||||
// operations may be required for some comparisons.
|
||||
@ -9759,15 +9818,18 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
|
||||
switch (SetCCOpcode) {
|
||||
default: llvm_unreachable("Unexpected SETCC condition");
|
||||
case ISD::SETNE: Invert = true;
|
||||
case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
|
||||
case ISD::SETEQ: Opc = MaskResult? X86ISD::PCMPEQM: X86ISD::PCMPEQ; break;
|
||||
case ISD::SETLT: Swap = true;
|
||||
case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
|
||||
case ISD::SETGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; break;
|
||||
case ISD::SETGE: Swap = true;
|
||||
case ISD::SETLE: Opc = X86ISD::PCMPGT; Invert = true; break;
|
||||
case ISD::SETLE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
|
||||
Invert = true; break;
|
||||
case ISD::SETULT: Swap = true;
|
||||
case ISD::SETUGT: Opc = X86ISD::PCMPGT; FlipSigns = true; break;
|
||||
case ISD::SETUGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
|
||||
FlipSigns = true; break;
|
||||
case ISD::SETUGE: Swap = true;
|
||||
case ISD::SETULE: Opc = X86ISD::PCMPGT; FlipSigns = true; Invert = true; break;
|
||||
case ISD::SETULE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
|
||||
FlipSigns = true; Invert = true; break;
|
||||
}
|
||||
|
||||
// Special case: Use min/max operations for SETULE/SETUGE
|
||||
@ -13201,6 +13263,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::CMP: return "X86ISD::CMP";
|
||||
case X86ISD::COMI: return "X86ISD::COMI";
|
||||
case X86ISD::UCOMI: return "X86ISD::UCOMI";
|
||||
case X86ISD::CMPM: return "X86ISD::CMPM";
|
||||
case X86ISD::CMPMU: return "X86ISD::CMPMU";
|
||||
case X86ISD::SETCC: return "X86ISD::SETCC";
|
||||
case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
|
||||
case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd";
|
||||
@ -13273,6 +13337,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::CMPP: return "X86ISD::CMPP";
|
||||
case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
|
||||
case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
|
||||
case X86ISD::PCMPEQM: return "X86ISD::PCMPEQM";
|
||||
case X86ISD::PCMPGTM: return "X86ISD::PCMPGTM";
|
||||
case X86ISD::ADD: return "X86ISD::ADD";
|
||||
case X86ISD::SUB: return "X86ISD::SUB";
|
||||
case X86ISD::ADC: return "X86ISD::ADC";
|
||||
|
@ -274,6 +274,13 @@ namespace llvm {
|
||||
|
||||
// PCMP* - Vector integer comparisons.
|
||||
PCMPEQ, PCMPGT,
|
||||
// PCMP*M - Vector integer comparisons, the result is in a mask vector
|
||||
PCMPEQM, PCMPGTM,
|
||||
|
||||
/// CMPM, CMPMU - Vector comparison generating mask bits for fp and
|
||||
/// integer signed and unsigned data types.
|
||||
CMPM,
|
||||
CMPMU,
|
||||
|
||||
// ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
|
||||
ADD, SUB, ADC, SBB, SMUL,
|
||||
|
@ -564,7 +564,195 @@ defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512me
|
||||
defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem,
|
||||
v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 - BLEND using mask
|
||||
//
|
||||
multiclass avx512_blendmask<bits<8> opc, string OpcodeStr,
|
||||
RegisterClass KRC, RegisterClass RC,
|
||||
X86MemOperand x86memop, PatFrag mem_frag,
|
||||
SDNode OpNode, ValueType vt> {
|
||||
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins KRC:$mask, RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, ${dst}{${mask}}|${dst}{${mask}}, $src1, $src2}"),
|
||||
[(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
|
||||
(vt RC:$src1)))]>, EVEX_4V, EVEX_K;
|
||||
|
||||
def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins KRC:$mask, RC:$src1, x86memop:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $mask, $dst|$dst, $mask, $src1, $src2}"),
|
||||
[]>,
|
||||
EVEX_4V, EVEX_K;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps", VK16WM, VR512, f512mem,
|
||||
memopv16f32, vselect, v16f32>,
|
||||
EVEX_CD8<32, CD8VF>, EVEX_V512;
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd", VK8WM, VR512, f512mem,
|
||||
memopv8f64, vselect, v8f64>,
|
||||
VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
|
||||
|
||||
defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd", VK16WM, VR512, f512mem,
|
||||
memopv8i64, vselect, v16i32>,
|
||||
EVEX_CD8<32, CD8VF>, EVEX_V512;
|
||||
|
||||
defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq", VK8WM, VR512, f512mem,
|
||||
memopv8i64, vselect, v8i64>, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>, EVEX_V512;
|
||||
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
|
||||
(v8f32 VR256X:$src2))),
|
||||
(EXTRACT_SUBREG
|
||||
(v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
|
||||
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
|
||||
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
|
||||
|
||||
def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
|
||||
(v8i32 VR256X:$src2))),
|
||||
(EXTRACT_SUBREG
|
||||
(v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
|
||||
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
|
||||
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
|
||||
}
|
||||
|
||||
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, RegisterClass KRC,
|
||||
RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
|
||||
SDNode OpNode, ValueType vt> {
|
||||
def rr : AVX512BI<opc, MRMSrcReg,
|
||||
(outs KRC:$dst), (ins RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
|
||||
IIC_SSE_CMPP_RR>, EVEX_4V;
|
||||
def rm : AVX512BI<opc, MRMSrcMem,
|
||||
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set KRC:$dst, (OpNode (vt RC:$src1),
|
||||
(bitconvert (memop_frag addr:$src2))))],
|
||||
IIC_SSE_CMPP_RM>, EVEX_4V;
|
||||
}
|
||||
|
||||
defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem,
|
||||
memopv8i64, X86pcmpeqm, v16i32>, EVEX_V512;
|
||||
defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem,
|
||||
memopv8i64, X86pcmpeqm, v8i64>, T8, EVEX_V512, VEX_W;
|
||||
|
||||
defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem,
|
||||
memopv8i64, X86pcmpgtm, v16i32>, EVEX_V512;
|
||||
defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem,
|
||||
memopv8i64, X86pcmpgtm, v8i64>, T8, EVEX_V512, VEX_W;
|
||||
|
||||
def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
|
||||
(COPY_TO_REGCLASS (VPCMPGTDZrr
|
||||
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
|
||||
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
|
||||
|
||||
def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
|
||||
(COPY_TO_REGCLASS (VPCMPEQDZrr
|
||||
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
|
||||
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
|
||||
|
||||
|
||||
multiclass avx512_icmp_cc<bits<8> opc, RegisterClass KRC,
|
||||
RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
|
||||
SDNode OpNode, ValueType vt, Operand CC, string asm,
|
||||
string asm_alt> {
|
||||
def rri : AVX512AIi8<opc, MRMSrcReg,
|
||||
(outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
|
||||
[(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))],
|
||||
IIC_SSE_CMPP_RR>, EVEX_4V;
|
||||
def rmi : AVX512AIi8<opc, MRMSrcMem,
|
||||
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
|
||||
[(set KRC:$dst, (OpNode (vt RC:$src1),
|
||||
(bitconvert (memop_frag addr:$src2)), imm:$cc))],
|
||||
IIC_SSE_CMPP_RM>, EVEX_4V;
|
||||
// Accept explicit immediate argument form instead of comparison code.
|
||||
let neverHasSideEffects = 1 in {
|
||||
def rri_alt : AVX512AIi8<opc, MRMSrcReg,
|
||||
(outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
|
||||
asm_alt, [], IIC_SSE_CMPP_RR>, EVEX_4V;
|
||||
def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
|
||||
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
|
||||
asm_alt, [], IIC_SSE_CMPP_RM>, EVEX_4V;
|
||||
}
|
||||
}
|
||||
|
||||
defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16, VR512, i512mem, memopv8i64,
|
||||
X86cmpm, v16i32, AVXCC,
|
||||
"vpcmp${cc}d\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"vpcmpd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16, VR512, i512mem, memopv8i64,
|
||||
X86cmpmu, v16i32, AVXCC,
|
||||
"vpcmp${cc}ud\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"vpcmpud\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
|
||||
defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8, VR512, i512mem, memopv8i64,
|
||||
X86cmpm, v8i64, AVXCC,
|
||||
"vpcmp${cc}q\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"vpcmpq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
|
||||
VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||
defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8, VR512, i512mem, memopv8i64,
|
||||
X86cmpmu, v8i64, AVXCC,
|
||||
"vpcmp${cc}uq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"vpcmpuq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
|
||||
VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
// avx512_cmp_packed - sse 1 & 2 compare packed instructions
|
||||
multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
|
||||
X86MemOperand x86memop, Operand CC,
|
||||
SDNode OpNode, ValueType vt, string asm,
|
||||
string asm_alt, Domain d> {
|
||||
def rri : AVX512PIi8<0xC2, MRMSrcReg,
|
||||
(outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
|
||||
[(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
|
||||
def rmi : AVX512PIi8<0xC2, MRMSrcMem,
|
||||
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
|
||||
[(set KRC:$dst,
|
||||
(OpNode (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
|
||||
|
||||
// Accept explicit immediate argument form instead of comparison code.
|
||||
let neverHasSideEffects = 1 in {
|
||||
def rri_alt : PIi8<0xC2, MRMSrcReg,
|
||||
(outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
|
||||
asm_alt, [], IIC_SSE_CMPP_RR, d>;
|
||||
def rmi_alt : PIi8<0xC2, MRMSrcMem,
|
||||
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
|
||||
asm_alt, [], IIC_SSE_CMPP_RM, d>;
|
||||
}
|
||||
}
|
||||
|
||||
defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, AVXCC, X86cmpm, v16f32,
|
||||
"vcmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"vcmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
|
||||
SSEPackedSingle>, TB, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, AVXCC, X86cmpm, v8f64,
|
||||
"vcmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"vcmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
|
||||
SSEPackedDouble>, TB, OpSize, EVEX_4V, VEX_W, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
|
||||
def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
|
||||
(COPY_TO_REGCLASS (VCMPPSZrri
|
||||
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
|
||||
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
|
||||
imm:$cc), VK8)>;
|
||||
def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
|
||||
(COPY_TO_REGCLASS (VPCMPDZrri
|
||||
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
|
||||
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
|
||||
imm:$cc), VK8)>;
|
||||
def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
|
||||
(COPY_TO_REGCLASS (VPCMPUDZrri
|
||||
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
|
||||
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
|
||||
imm:$cc), VK8)>;
|
||||
|
||||
// Mask register copy, including
|
||||
// - copy between mask registers
|
||||
// - load/store mask registers
|
||||
@ -949,4 +1137,18 @@ defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, memopv16i32, i
|
||||
defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
let AddedComplexity = 20 in {
|
||||
def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1),
|
||||
(v16f32 VR512:$src2))),
|
||||
(VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
|
||||
def : Pat<(v8f64 (vselect VK8WM:$mask, (v8f64 VR512:$src1),
|
||||
(v8f64 VR512:$src2))),
|
||||
(VMOVUPDZrrk VR512:$src2, VK8WM:$mask, VR512:$src1)>;
|
||||
def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src1),
|
||||
(v16i32 VR512:$src2))),
|
||||
(VMOVDQU32rrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
|
||||
def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src1),
|
||||
(v8i64 VR512:$src2))),
|
||||
(VMOVDQU64rrk VR512:$src2, VK8WM:$mask, VR512:$src1)>;
|
||||
}
|
||||
|
||||
|
@ -118,6 +118,15 @@ def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
|
||||
def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
|
||||
def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
|
||||
|
||||
def X86IntCmpMask : SDTypeProfile<1, 2,
|
||||
[SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<1>]>;
|
||||
def X86pcmpeqm : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>;
|
||||
def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>;
|
||||
|
||||
def X86CmpMaskCC : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
|
||||
def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
|
||||
def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
|
||||
|
||||
def X86vshl : SDNode<"X86ISD::VSHL",
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
SDTCisVec<2>]>>;
|
||||
|
113
test/CodeGen/X86/avx512-vec-cmp.ll
Normal file
113
test/CodeGen/X86/avx512-vec-cmp.ll
Normal file
@ -0,0 +1,113 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: test1
|
||||
; CHECK: vcmpleps
|
||||
; CHECK: vmovups
|
||||
; CHECK: ret
|
||||
define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
|
||||
%mask = fcmp ole <16 x float> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
|
||||
ret <16 x float> %max
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test2
|
||||
; CHECK: vcmplepd
|
||||
; CHECK: vmovupd
|
||||
; CHECK: ret
|
||||
define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
|
||||
%mask = fcmp ole <8 x double> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
|
||||
ret <8 x double> %max
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test3
|
||||
; CHECK: vpcmpeqd
|
||||
; CHECK: vmovdqu32
|
||||
; CHECK: ret
|
||||
define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %y) nounwind {
|
||||
%mask = icmp eq <16 x i32> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
|
||||
ret <16 x i32> %max
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @test4_unsigned
|
||||
; CHECK: vpcmpnltud
|
||||
; CHECK: vmovdqu32
|
||||
; CHECK: ret
|
||||
define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y) nounwind {
|
||||
%mask = icmp uge <16 x i32> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
|
||||
ret <16 x i32> %max
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test5
|
||||
; CHECK: vpcmpeqq {{.*}}%k1
|
||||
; CHECK: vmovdqu64 {{.*}}%k1
|
||||
; CHECK: ret
|
||||
define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
|
||||
%mask = icmp eq <8 x i64> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
|
||||
ret <8 x i64> %max
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test6_unsigned
|
||||
; CHECK: vpcmpnleuq {{.*}}%k1
|
||||
; CHECK: vmovdqu64 {{.*}}%k1
|
||||
; CHECK: ret
|
||||
define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y) nounwind {
|
||||
%mask = icmp ugt <8 x i64> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
|
||||
ret <8 x i64> %max
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test7
|
||||
; CHECK: xor
|
||||
; CHECK: vcmpltps
|
||||
; CHECK: vblendvps
|
||||
; CHECK: ret
|
||||
define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
|
||||
%mask = fcmp olt <4 x float> %a, zeroinitializer
|
||||
%c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
|
||||
ret <4 x float>%c
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test8
|
||||
; CHECK: xor
|
||||
; CHECK: vcmpltpd
|
||||
; CHECK: vblendvpd
|
||||
; CHECK: ret
|
||||
define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
|
||||
%mask = fcmp olt <2 x double> %a, zeroinitializer
|
||||
%c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
|
||||
ret <2 x double>%c
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test9
|
||||
; CHECK: vpcmpeqd
|
||||
; CHECK: vpblendmd
|
||||
; CHECK: ret
|
||||
define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
|
||||
%mask = icmp eq <8 x i32> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
|
||||
ret <8 x i32> %max
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test10
|
||||
; CHECK: vcmpeqps
|
||||
; CHECK: vblendmps
|
||||
; CHECK: ret
|
||||
define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
|
||||
%mask = fcmp oeq <8 x float> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
|
||||
ret <8 x float> %max
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test11_unsigned
|
||||
; CHECK: vpcmpnleud %zmm
|
||||
; CHECK: vpblendmd %zmm
|
||||
; CHECK: ret
|
||||
define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
|
||||
%mask = icmp ugt <8 x i32> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
|
||||
ret <8 x i32> %max
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user