mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-19 11:41:53 +00:00
[X86] Remove VPTESTM/VPTESTNM ISD opcodes. Use isel patterns matching cmpm eq/ne with immallzeros.
llvm-svn: 323612
This commit is contained in:
parent
03e2665282
commit
54f7fd3255
@ -451,8 +451,7 @@ namespace {
|
||||
// type.
|
||||
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
|
||||
unsigned Opcode = N->getOpcode();
|
||||
if (Opcode == X86ISD::CMPM || Opcode == X86ISD::TESTM ||
|
||||
Opcode == X86ISD::TESTNM || Opcode == X86ISD::CMPMU ||
|
||||
if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMU ||
|
||||
Opcode == X86ISD::CMPM_RND) {
|
||||
// We can get 256-bit 8 element types here without VLX being enabled. When
|
||||
// this happens we will use 512-bit operations and the mask will not be
|
||||
|
@ -5043,8 +5043,6 @@ static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return false;
|
||||
case X86ISD::TESTM:
|
||||
case X86ISD::TESTNM:
|
||||
case X86ISD::CMPM:
|
||||
case X86ISD::CMPMU:
|
||||
case X86ISD::CMPM_RND:
|
||||
@ -14639,9 +14637,11 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
assert(Cond.getValueType().getScalarSizeInBits() ==
|
||||
VT.getScalarSizeInBits() &&
|
||||
"Should have a size-matched integer condition!");
|
||||
// Build a mask by testing the condition against itself (tests for zero).
|
||||
// Build a mask by testing the condition against zero.
|
||||
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
|
||||
SDValue Mask = DAG.getNode(X86ISD::TESTM, dl, MaskVT, Cond, Cond);
|
||||
SDValue Mask = DAG.getNode(X86ISD::CMPM, dl, MaskVT, Cond,
|
||||
getZeroVector(VT, Subtarget, DAG, dl),
|
||||
DAG.getConstant(4, dl, MVT::i8));
|
||||
// Now return a new VSELECT using the mask.
|
||||
return DAG.getSelect(dl, VT, Mask, Op.getOperand(1), Op.getOperand(2));
|
||||
}
|
||||
@ -16609,7 +16609,9 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
|
||||
In = DAG.getNode(ISD::SHL, DL, InVT, In,
|
||||
DAG.getConstant(ShiftInx, DL, InVT));
|
||||
}
|
||||
return DAG.getNode(X86ISD::TESTM, DL, VT, In, In);
|
||||
return DAG.getNode(X86ISD::CMPM, DL, VT, In,
|
||||
getZeroVector(InVT, Subtarget, DAG, DL),
|
||||
DAG.getConstant(4, DL, MVT::i8));
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
|
||||
@ -17766,26 +17768,6 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
|
||||
if (Swap)
|
||||
std::swap(Op0, Op1);
|
||||
|
||||
// See if it is the case of CMP(EQ|NEQ,AND(A,B),ZERO) and change it to TESTM|NM.
|
||||
if (SSECC == 4 || SSECC == 0) {
|
||||
SDValue A = peekThroughBitcasts(Op0);
|
||||
if ((A.getOpcode() == ISD::AND || A.getOpcode() == X86ISD::FAND) &&
|
||||
ISD::isBuildVectorAllZeros(Op1.getNode())) {
|
||||
MVT VT0 = Op0.getSimpleValueType();
|
||||
SDValue RHS = DAG.getBitcast(VT0, A.getOperand(0));
|
||||
SDValue LHS = DAG.getBitcast(VT0, A.getOperand(1));
|
||||
return DAG.getNode(SSECC == 0 ? X86ISD::TESTNM : X86ISD::TESTM,
|
||||
dl, VT, RHS, LHS);
|
||||
}
|
||||
|
||||
// If this is just a comparison with 0 without an AND, we can just use
|
||||
// the same input twice to avoid creating a zero vector.
|
||||
if (ISD::isBuildVectorAllZeros(Op1.getNode())) {
|
||||
return DAG.getNode(SSECC == 0 ? X86ISD::TESTNM : X86ISD::TESTM,
|
||||
dl, VT, Op0, Op0);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode) ? X86ISD::CMPMU
|
||||
: X86ISD::CMPM;
|
||||
return DAG.getNode(Opc, dl, VT, Op0, Op1,
|
||||
@ -25365,8 +25347,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::MOVMSK: return "X86ISD::MOVMSK";
|
||||
case X86ISD::PTEST: return "X86ISD::PTEST";
|
||||
case X86ISD::TESTP: return "X86ISD::TESTP";
|
||||
case X86ISD::TESTM: return "X86ISD::TESTM";
|
||||
case X86ISD::TESTNM: return "X86ISD::TESTNM";
|
||||
case X86ISD::KORTEST: return "X86ISD::KORTEST";
|
||||
case X86ISD::KTEST: return "X86ISD::KTEST";
|
||||
case X86ISD::KSHIFTL: return "X86ISD::KSHIFTL";
|
||||
@ -37674,28 +37654,6 @@ static SDValue combineVSZext(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue combineTestM(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
SDValue Op0 = N->getOperand(0);
|
||||
SDValue Op1 = N->getOperand(1);
|
||||
|
||||
MVT VT = N->getSimpleValueType(0);
|
||||
SDLoc DL(N);
|
||||
|
||||
// TEST (AND a, b) ,(AND a, b) -> TEST a, b
|
||||
if (Op0 == Op1 && Op1->getOpcode() == ISD::AND)
|
||||
return DAG.getNode(X86ISD::TESTM, DL, VT, Op0->getOperand(0),
|
||||
Op0->getOperand(1));
|
||||
|
||||
// TEST op0, BUILD_VECTOR(all_zero) -> BUILD_VECTOR(all_zero)
|
||||
// TEST BUILD_VECTOR(all_zero), op1 -> BUILD_VECTOR(all_zero)
|
||||
if (ISD::isBuildVectorAllZeros(Op0.getNode()) ||
|
||||
ISD::isBuildVectorAllZeros(Op1.getNode()))
|
||||
return getZeroVector(VT, Subtarget, DAG, DL);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
MVT VT = N->getSimpleValueType(0);
|
||||
@ -38001,7 +37959,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case X86ISD::MSCATTER:
|
||||
case ISD::MGATHER:
|
||||
case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI, Subtarget);
|
||||
case X86ISD::TESTM: return combineTestM(N, DAG, Subtarget);
|
||||
case X86ISD::PCMPEQ:
|
||||
case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget);
|
||||
}
|
||||
|
@ -368,10 +368,6 @@ namespace llvm {
|
||||
// Vector packed fp sign bitwise comparisons.
|
||||
TESTP,
|
||||
|
||||
// Vector "test" in AVX-512, the result is in a mask vector.
|
||||
TESTM,
|
||||
TESTNM,
|
||||
|
||||
// OR/AND test for masks.
|
||||
KORTEST,
|
||||
KTEST,
|
||||
|
@ -2084,6 +2084,8 @@ multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
|
||||
|
||||
def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
|
||||
(X86cmpm node:$src1, node:$src2, (i8 0))>;
|
||||
def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
|
||||
(X86cmpm node:$src1, node:$src2, (i8 4))>;
|
||||
def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
|
||||
(X86cmpm node:$src1, node:$src2, (i8 6))>;
|
||||
|
||||
@ -5197,42 +5199,57 @@ defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs
|
||||
// AVX-512 VPTESTM instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
OpndItins itins, X86VectorVTInfo _> {
|
||||
multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
|
||||
OpndItins itins, X86VectorVTInfo _, string Suffix> {
|
||||
let ExeDomain = _.ExeDomain in {
|
||||
let isCommutable = 1 in
|
||||
defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
|
||||
(OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
|
||||
_.ImmAllZerosV), itins.rr>,
|
||||
EVEX_4V, Sched<[itins.Sched]>;
|
||||
defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
|
||||
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT (bitconvert (_.LdFrag addr:$src2)))), itins.rm>,
|
||||
EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
|
||||
(OpNode (bitconvert
|
||||
(_.i64VT (and _.RC:$src1,
|
||||
(bitconvert (_.LdFrag addr:$src2))))),
|
||||
_.ImmAllZerosV),
|
||||
itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
// Patterns for compare with 0 that just use the same source twice.
|
||||
def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
|
||||
(_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rr")
|
||||
_.RC:$src, _.RC:$src))>;
|
||||
|
||||
def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
|
||||
(_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rrk")
|
||||
_.KRC:$mask, _.RC:$src, _.RC:$src))>;
|
||||
}
|
||||
|
||||
multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
|
||||
OpndItins itins, X86VectorVTInfo _> {
|
||||
let ExeDomain = _.ExeDomain in
|
||||
defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
|
||||
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
|
||||
"${src2}"##_.BroadcastStr##", $src1",
|
||||
"$src1, ${src2}"##_.BroadcastStr,
|
||||
(OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast
|
||||
(_.ScalarLdFrag addr:$src2)))),
|
||||
(OpNode (and _.RC:$src1,
|
||||
(X86VBroadcast
|
||||
(_.ScalarLdFrag addr:$src2))),
|
||||
_.ImmAllZerosV),
|
||||
itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
// Use 512bit version to implement 128/256 bit in case NoVLX.
|
||||
multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
|
||||
multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
|
||||
X86VectorVTInfo _, string Suffix> {
|
||||
def : Pat<(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
|
||||
def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
|
||||
_.ImmAllZerosV)),
|
||||
(_.KVT (COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(NAME # Suffix # "Zrr")
|
||||
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
|
||||
@ -5242,7 +5259,8 @@ multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
|
||||
_.KRC))>;
|
||||
|
||||
def : Pat<(_.KVT (and _.KRC:$mask,
|
||||
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))),
|
||||
(OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
|
||||
_.ImmAllZerosV))),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(NAME # Suffix # "Zrrk")
|
||||
(COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
|
||||
@ -5251,19 +5269,38 @@ multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
|
||||
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
|
||||
_.RC:$src2, _.SubRegIdx)),
|
||||
_.KRC)>;
|
||||
|
||||
def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
|
||||
(_.KVT (COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(NAME # Suffix # "Zrr")
|
||||
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
|
||||
_.RC:$src, _.SubRegIdx),
|
||||
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
|
||||
_.RC:$src, _.SubRegIdx)),
|
||||
_.KRC))>;
|
||||
|
||||
def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(NAME # Suffix # "Zrrk")
|
||||
(COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
|
||||
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
|
||||
_.RC:$src, _.SubRegIdx),
|
||||
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
|
||||
_.RC:$src, _.SubRegIdx)),
|
||||
_.KRC)>;
|
||||
}
|
||||
|
||||
multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
|
||||
OpndItins itins, AVX512VLVectorVTInfo _,
|
||||
string Suffix> {
|
||||
let Predicates = [HasAVX512] in
|
||||
defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512>,
|
||||
defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512, Suffix>,
|
||||
avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
|
||||
|
||||
let Predicates = [HasAVX512, HasVLX] in {
|
||||
defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256>,
|
||||
defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256, Suffix>,
|
||||
avx512_vptest_mb<opc, OpcodeStr, OpNode,itins, _.info256>, EVEX_V256;
|
||||
defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128>,
|
||||
defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128, Suffix>,
|
||||
avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
|
||||
}
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
@ -5272,7 +5309,7 @@ multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
|
||||
OpndItins itins> {
|
||||
defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, itins,
|
||||
avx512vl_i32_info, "D">;
|
||||
@ -5281,41 +5318,41 @@ multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
}
|
||||
|
||||
multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, OpndItins itins> {
|
||||
PatFrag OpNode, OpndItins itins> {
|
||||
let Predicates = [HasBWI] in {
|
||||
defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info>,
|
||||
defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info, "W">,
|
||||
EVEX_V512, VEX_W;
|
||||
defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info>,
|
||||
defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info, "B">,
|
||||
EVEX_V512;
|
||||
}
|
||||
let Predicates = [HasVLX, HasBWI] in {
|
||||
|
||||
defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info>,
|
||||
defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info, "W">,
|
||||
EVEX_V256, VEX_W;
|
||||
defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info>,
|
||||
defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info, "W">,
|
||||
EVEX_V128, VEX_W;
|
||||
defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info>,
|
||||
defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info, "B">,
|
||||
EVEX_V256;
|
||||
defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info>,
|
||||
defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info, "B">,
|
||||
EVEX_V128;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
defm BZ256_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v32i8x_info, "B">;
|
||||
defm BZ128_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v16i8x_info, "B">;
|
||||
defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">;
|
||||
defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">;
|
||||
defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, "B">;
|
||||
defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, "B">;
|
||||
defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, "W">;
|
||||
defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, "W">;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
|
||||
SDNode OpNode, OpndItins itins> :
|
||||
PatFrag OpNode, OpndItins itins> :
|
||||
avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>,
|
||||
avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, itins>;
|
||||
|
||||
defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm,
|
||||
defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
|
||||
SSE_BIT_ITINS_P>, T8PD;
|
||||
defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm,
|
||||
defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
|
||||
SSE_BIT_ITINS_P>, T8XS;
|
||||
|
||||
|
||||
|
@ -234,10 +234,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
|
||||
SDTCisVec<1>,
|
||||
SDTCisSameAs<2, 1>]>;
|
||||
|
||||
def SDTX86Testm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisSameAs<2, 1>, SDTCVecEltisVT<0, i1>,
|
||||
SDTCisSameNumEltsAs<0, 1>]>;
|
||||
|
||||
def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp, [SDNPCommutative]>;
|
||||
def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
|
||||
def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>;
|
||||
@ -248,8 +244,6 @@ def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
|
||||
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
|
||||
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
|
||||
def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
|
||||
def X86testm : SDNode<"X86ISD::TESTM", SDTX86Testm, [SDNPCommutative]>;
|
||||
def X86testnm : SDNode<"X86ISD::TESTNM", SDTX86Testm, [SDNPCommutative]>;
|
||||
|
||||
def X86movmsk : SDNode<"X86ISD::MOVMSK",
|
||||
SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVec<1>]>>;
|
||||
|
@ -7,8 +7,8 @@
|
||||
define <8 x i16> @testv8i1_sext_v8i16(<8 x i32>* %p) {
|
||||
; AVX256-LABEL: testv8i1_sext_v8i16:
|
||||
; AVX256: # %bb.0:
|
||||
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
|
||||
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
|
||||
@ -17,8 +17,8 @@ define <8 x i16> @testv8i1_sext_v8i16(<8 x i32>* %p) {
|
||||
;
|
||||
; AVX512VL-LABEL: testv8i1_sext_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
@ -42,10 +42,9 @@ define <8 x i16> @testv8i1_sext_v8i16(<8 x i32>* %p) {
|
||||
define <16 x i8> @testv16i1_sext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
|
||||
; AVX256-LABEL: testv16i1_sext_v16i8:
|
||||
; AVX256: # %bb.0:
|
||||
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX256-NEXT: vmovdqa (%rsi), %ymm0
|
||||
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
|
||||
; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
|
||||
; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
|
||||
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
|
||||
; AVX256-NEXT: vpmovdw %ymm1, %xmm1
|
||||
@ -59,10 +58,9 @@ define <16 x i8> @testv16i1_sext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
|
||||
;
|
||||
; AVX512VL-LABEL: testv16i1_sext_v16i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
|
||||
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
|
||||
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
|
||||
; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
|
||||
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
|
||||
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
@ -92,10 +90,9 @@ define <16 x i8> @testv16i1_sext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
|
||||
define <16 x i16> @testv16i1_sext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
|
||||
; AVX256-LABEL: testv16i1_sext_v16i16:
|
||||
; AVX256: # %bb.0:
|
||||
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX256-NEXT: vmovdqa (%rsi), %ymm0
|
||||
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
|
||||
; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
|
||||
; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
|
||||
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
|
||||
; AVX256-NEXT: vpmovdw %ymm1, %xmm1
|
||||
@ -106,10 +103,9 @@ define <16 x i16> @testv16i1_sext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
|
||||
;
|
||||
; AVX512VL-LABEL: testv16i1_sext_v16i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
|
||||
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
|
||||
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
|
||||
; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
|
||||
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
|
||||
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
@ -137,8 +133,8 @@ define <16 x i16> @testv16i1_sext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
|
||||
define <8 x i16> @testv8i1_zext_v8i16(<8 x i32>* %p) {
|
||||
; AVX256-LABEL: testv8i1_zext_v8i16:
|
||||
; AVX256: # %bb.0:
|
||||
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
|
||||
; AVX256-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
|
||||
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX256-NEXT: vzeroupper
|
||||
@ -146,8 +142,8 @@ define <8 x i16> @testv8i1_zext_v8i16(<8 x i32>* %p) {
|
||||
;
|
||||
; AVX512VL-LABEL: testv8i1_zext_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
@ -170,10 +166,9 @@ define <8 x i16> @testv8i1_zext_v8i16(<8 x i32>* %p) {
|
||||
define <16 x i8> @testv16i1_zext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
|
||||
; AVX256-LABEL: testv16i1_zext_v16i8:
|
||||
; AVX256: # %bb.0:
|
||||
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX256-NEXT: vmovdqa (%rsi), %ymm0
|
||||
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
|
||||
; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
|
||||
; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
|
||||
; AVX256-NEXT: movl {{.*}}(%rip), %eax
|
||||
; AVX256-NEXT: vpbroadcastd %eax, %ymm0 {%k2} {z}
|
||||
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
|
||||
@ -188,10 +183,9 @@ define <16 x i8> @testv16i1_zext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
|
||||
;
|
||||
; AVX512VL-LABEL: testv16i1_zext_v16i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
|
||||
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
|
||||
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
|
||||
; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
|
||||
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
|
||||
; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
@ -221,10 +215,9 @@ define <16 x i8> @testv16i1_zext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
|
||||
define <16 x i16> @testv16i1_zext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
|
||||
; AVX256-LABEL: testv16i1_zext_v16i16:
|
||||
; AVX256: # %bb.0:
|
||||
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX256-NEXT: vmovdqa (%rsi), %ymm0
|
||||
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
|
||||
; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
|
||||
; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
|
||||
; AVX256-NEXT: movl {{.*}}(%rip), %eax
|
||||
; AVX256-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z}
|
||||
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
|
||||
@ -235,10 +228,9 @@ define <16 x i16> @testv16i1_zext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
|
||||
;
|
||||
; AVX512VL-LABEL: testv16i1_zext_v16i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
|
||||
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
|
||||
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
|
||||
; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
|
||||
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
|
||||
; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
|
@ -11,10 +11,9 @@
|
||||
define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<8 x i32>* %a, <8 x i32>* %b) {
|
||||
; AVX256VL-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
|
||||
; AVX256VL: # %bb.0:
|
||||
; AVX256VL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX256VL-NEXT: vmovdqa (%rsi), %ymm1
|
||||
; AVX256VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX256VL-NEXT: vptestnmd %ymm1, %ymm1, %k2
|
||||
; AVX256VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX256VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
|
||||
; AVX256VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
|
||||
; AVX256VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
|
||||
; AVX256VL-NEXT: vpmovdw %ymm1, %xmm1
|
||||
@ -45,10 +44,9 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<8 x i32>* %a, <8
|
||||
;
|
||||
; AVX512VL-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm1
|
||||
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX512VL-NEXT: vptestnmd %ymm1, %ymm1, %k2
|
||||
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
|
||||
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
|
||||
; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
||||
; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,18,20,3,7,7,0,3,6,1,21,3,19,7,0]
|
||||
@ -61,10 +59,9 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<8 x i32>* %a, <8
|
||||
;
|
||||
; AVX256VLBW-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
|
||||
; AVX256VLBW: # %bb.0:
|
||||
; AVX256VLBW-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX256VLBW-NEXT: vmovdqa (%rsi), %ymm1
|
||||
; AVX256VLBW-NEXT: vptestnmd %ymm0, %ymm0, %k0
|
||||
; AVX256VLBW-NEXT: vptestnmd %ymm1, %ymm1, %k1
|
||||
; AVX256VLBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX256VLBW-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
|
||||
; AVX256VLBW-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
|
||||
; AVX256VLBW-NEXT: vpmovm2w %k1, %ymm0
|
||||
; AVX256VLBW-NEXT: vpmovm2w %k0, %ymm1
|
||||
; AVX256VLBW-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,18,20,3,7,7,0,3,6,1,21,3,19,7,0]
|
||||
@ -76,10 +73,9 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<8 x i32>* %a, <8
|
||||
;
|
||||
; AVX512VLBW-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VLBW-NEXT: vmovdqa (%rsi), %ymm1
|
||||
; AVX512VLBW-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX512VLBW-NEXT: vptestnmd %ymm1, %ymm1, %k2
|
||||
; AVX512VLBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
|
||||
; AVX512VLBW-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
|
||||
; AVX512VLBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
|
||||
; AVX512VLBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
||||
; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,18,20,3,7,7,0,3,6,1,21,3,19,7,0]
|
||||
|
Loading…
x
Reference in New Issue
Block a user