mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-21 19:20:50 +00:00
[SelectionDAG] Add a signed integer absolute ISD node
Reduced version of D26357 - based on the discussion on llvm-dev about canonicalization of UMIN/UMAX/SMIN/SMAX as well as ABS I've reduced that patch to just the ABS ISD node (with x86/sse support) to improve basic combines and lowering. ARM/AArch64, Hexagon, PowerPC and NVPTX all have similar instructions allowing us to make this a generic opcode and move away from the hard coded tablegen patterns which makes it tricky to match more complex patterns. At the moment this patch doesn't attempt legalization as we only create an ABS node if its legal/custom. Differential Revision: https://reviews.llvm.org/D29639 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297780 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
25754c1409
commit
e35265b998
@ -339,6 +339,12 @@ namespace ISD {
|
||||
/// Bitwise operators - logical and, logical or, logical xor.
|
||||
AND, OR, XOR,
|
||||
|
||||
/// ABS - Determine the unsigned absolute value of a signed integer value of
|
||||
/// the same bitwidth.
|
||||
/// Note: A value of INT_MIN will return INT_MIN, no saturation or overflow
|
||||
/// is performed.
|
||||
ABS,
|
||||
|
||||
/// Shift and rotation operations. After legalization, the type of the
|
||||
/// shift amount is known to be TLI.getShiftAmountTy(). Before legalization
|
||||
/// the shift amount can be any type, but care must be taken to ensure it is
|
||||
|
@ -413,6 +413,7 @@ def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
|
||||
def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>;
|
||||
def zext_invec : SDNode<"ISD::ZERO_EXTEND_VECTOR_INREG", SDTExtInvec>;
|
||||
|
||||
def abs : SDNode<"ISD::ABS" , SDTIntUnaryOp>;
|
||||
def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>;
|
||||
def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>;
|
||||
def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>;
|
||||
|
@ -262,6 +262,7 @@ namespace {
|
||||
SDValue visitSRA(SDNode *N);
|
||||
SDValue visitSRL(SDNode *N);
|
||||
SDValue visitRotate(SDNode *N);
|
||||
SDValue visitABS(SDNode *N);
|
||||
SDValue visitBSWAP(SDNode *N);
|
||||
SDValue visitBITREVERSE(SDNode *N);
|
||||
SDValue visitCTLZ(SDNode *N);
|
||||
@ -1423,6 +1424,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
|
||||
case ISD::SRL: return visitSRL(N);
|
||||
case ISD::ROTR:
|
||||
case ISD::ROTL: return visitRotate(N);
|
||||
case ISD::ABS: return visitABS(N);
|
||||
case ISD::BSWAP: return visitBSWAP(N);
|
||||
case ISD::BITREVERSE: return visitBITREVERSE(N);
|
||||
case ISD::CTLZ: return visitCTLZ(N);
|
||||
@ -5004,6 +5006,17 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
|
||||
N01C->getAPIntValue(), DL, VT));
|
||||
}
|
||||
}
|
||||
|
||||
// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
|
||||
unsigned OpSizeInBits = VT.getScalarSizeInBits();
|
||||
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
|
||||
N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
|
||||
TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
|
||||
if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
|
||||
if (C->getAPIntValue() == (OpSizeInBits - 1))
|
||||
return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
|
||||
}
|
||||
|
||||
// fold (xor x, x) -> 0
|
||||
if (N0 == N1)
|
||||
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
|
||||
@ -5746,6 +5759,22 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitABS(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
// fold (abs c1) -> c2
|
||||
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
|
||||
return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
|
||||
// fold (abs (abs x)) -> (abs x)
|
||||
if (N0.getOpcode() == ISD::ABS)
|
||||
return N0;
|
||||
// fold (abs x) -> x iff not-negative
|
||||
if (DAG.SignBitIsZero(N0))
|
||||
return N0;
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitBSWAP(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
@ -3336,6 +3336,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
||||
if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
|
||||
return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
|
||||
break;
|
||||
case ISD::ABS:
|
||||
return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
|
||||
C->isOpaque());
|
||||
case ISD::BITREVERSE:
|
||||
return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
|
||||
C->isOpaque());
|
||||
@ -3455,6 +3458,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
||||
case ISD::TRUNCATE:
|
||||
case ISD::UINT_TO_FP:
|
||||
case ISD::SINT_TO_FP:
|
||||
case ISD::ABS:
|
||||
case ISD::BITREVERSE:
|
||||
case ISD::BSWAP:
|
||||
case ISD::CTLZ:
|
||||
@ -3570,6 +3574,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
||||
return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
|
||||
return Operand.getNode()->getOperand(0);
|
||||
}
|
||||
if (OpOpcode == ISD::UNDEF)
|
||||
return getUNDEF(VT);
|
||||
break;
|
||||
case ISD::ABS:
|
||||
assert(VT.isInteger() && VT == Operand.getValueType() &&
|
||||
"Invalid ABS!");
|
||||
if (OpOpcode == ISD::UNDEF)
|
||||
return getUNDEF(VT);
|
||||
break;
|
||||
|
@ -300,6 +300,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
||||
case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
|
||||
|
||||
// Bit manipulation
|
||||
case ISD::ABS: return "abs";
|
||||
case ISD::BITREVERSE: return "bitreverse";
|
||||
case ISD::BSWAP: return "bswap";
|
||||
case ISD::CTPOP: return "ctpop";
|
||||
|
@ -900,6 +900,7 @@ void TargetLoweringBase::initActions() {
|
||||
setOperationAction(ISD::SMAX, VT, Expand);
|
||||
setOperationAction(ISD::UMIN, VT, Expand);
|
||||
setOperationAction(ISD::UMAX, VT, Expand);
|
||||
setOperationAction(ISD::ABS, VT, Expand);
|
||||
|
||||
// Overflow operations default to expand
|
||||
setOperationAction(ISD::SADDO, VT, Expand);
|
||||
|
@ -896,6 +896,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
}
|
||||
|
||||
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
|
||||
setOperationAction(ISD::ABS, MVT::v16i8, Legal);
|
||||
setOperationAction(ISD::ABS, MVT::v8i16, Legal);
|
||||
setOperationAction(ISD::ABS, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
|
||||
@ -1081,6 +1084,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
|
||||
|
||||
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
|
||||
setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
|
||||
setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
|
||||
setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
|
||||
setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
|
||||
@ -1287,6 +1291,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
}
|
||||
}
|
||||
if (Subtarget.hasVLX()) {
|
||||
setOperationAction(ISD::ABS, MVT::v4i64, Legal);
|
||||
setOperationAction(ISD::ABS, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
|
||||
@ -1383,6 +1389,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
|
||||
|
||||
for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
|
||||
setOperationAction(ISD::ABS, VT, Legal);
|
||||
setOperationAction(ISD::SRL, VT, Custom);
|
||||
setOperationAction(ISD::SHL, VT, Custom);
|
||||
setOperationAction(ISD::SRA, VT, Custom);
|
||||
@ -1562,6 +1569,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
|
||||
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::VSELECT, VT, Legal);
|
||||
setOperationAction(ISD::ABS, VT, Legal);
|
||||
setOperationAction(ISD::SRL, VT, Custom);
|
||||
setOperationAction(ISD::SHL, VT, Custom);
|
||||
setOperationAction(ISD::SRA, VT, Custom);
|
||||
@ -21155,6 +21163,25 @@ static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
|
||||
return Lower256IntArith(Op, DAG);
|
||||
}
|
||||
|
||||
static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
|
||||
assert(Op.getSimpleValueType().is256BitVector() &&
|
||||
Op.getSimpleValueType().isInteger() &&
|
||||
"Only handle AVX 256-bit vector integer operation");
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
|
||||
SDLoc dl(Op);
|
||||
SDValue Src = Op.getOperand(0);
|
||||
SDValue Lo = extract128BitVector(Src, 0, DAG, dl);
|
||||
SDValue Hi = extract128BitVector(Src, NumElems / 2, DAG, dl);
|
||||
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
|
||||
DAG.getNode(ISD::ABS, dl, NewVT, Lo),
|
||||
DAG.getNode(ISD::ABS, dl, NewVT, Hi));
|
||||
}
|
||||
|
||||
static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
|
||||
assert(Op.getSimpleValueType().is256BitVector() &&
|
||||
Op.getSimpleValueType().isInteger() &&
|
||||
@ -23699,6 +23726,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::SMIN:
|
||||
case ISD::UMAX:
|
||||
case ISD::UMIN: return LowerMINMAX(Op, DAG);
|
||||
case ISD::ABS: return LowerABS(Op, DAG);
|
||||
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
|
||||
case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
|
||||
case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
|
||||
@ -24111,7 +24139,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::HSUB: return "X86ISD::HSUB";
|
||||
case X86ISD::FHADD: return "X86ISD::FHADD";
|
||||
case X86ISD::FHSUB: return "X86ISD::FHSUB";
|
||||
case X86ISD::ABS: return "X86ISD::ABS";
|
||||
case X86ISD::CONFLICT: return "X86ISD::CONFLICT";
|
||||
case X86ISD::FMAX: return "X86ISD::FMAX";
|
||||
case X86ISD::FMAXS: return "X86ISD::FMAXS";
|
||||
|
@ -239,9 +239,6 @@ namespace llvm {
|
||||
FHADD,
|
||||
FHSUB,
|
||||
|
||||
// Integer absolute value
|
||||
ABS,
|
||||
|
||||
// Detect Conflicts Within a Vector
|
||||
CONFLICT,
|
||||
|
||||
|
@ -8606,66 +8606,7 @@ multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
|
||||
HasBWI>;
|
||||
}
|
||||
|
||||
defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>;
|
||||
|
||||
def avx512_v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
|
||||
VR128X:$src))>;
|
||||
def avx512_v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128X:$src, (i8 15)))>;
|
||||
def avx512_v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128X:$src, (i8 31)))>;
|
||||
def avx512_v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
|
||||
VR256X:$src))>;
|
||||
def avx512_v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256X:$src, (i8 15)))>;
|
||||
def avx512_v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256X:$src, (i8 31)))>;
|
||||
|
||||
let Predicates = [HasBWI, HasVLX] in {
|
||||
def : Pat<(xor
|
||||
(bc_v2i64 (avx512_v16i1sextv16i8)),
|
||||
(bc_v2i64 (add (v16i8 VR128X:$src), (avx512_v16i1sextv16i8)))),
|
||||
(VPABSBZ128rr VR128X:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v2i64 (avx512_v8i1sextv8i16)),
|
||||
(bc_v2i64 (add (v8i16 VR128X:$src), (avx512_v8i1sextv8i16)))),
|
||||
(VPABSWZ128rr VR128X:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v4i64 (avx512_v32i1sextv32i8)),
|
||||
(bc_v4i64 (add (v32i8 VR256X:$src), (avx512_v32i1sextv32i8)))),
|
||||
(VPABSBZ256rr VR256X:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v4i64 (avx512_v16i1sextv16i16)),
|
||||
(bc_v4i64 (add (v16i16 VR256X:$src), (avx512_v16i1sextv16i16)))),
|
||||
(VPABSWZ256rr VR256X:$src)>;
|
||||
}
|
||||
let Predicates = [HasAVX512, HasVLX] in {
|
||||
def : Pat<(xor
|
||||
(bc_v2i64 (avx512_v4i1sextv4i32)),
|
||||
(bc_v2i64 (add (v4i32 VR128X:$src), (avx512_v4i1sextv4i32)))),
|
||||
(VPABSDZ128rr VR128X:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v4i64 (avx512_v8i1sextv8i32)),
|
||||
(bc_v4i64 (add (v8i32 VR256X:$src), (avx512_v8i1sextv8i32)))),
|
||||
(VPABSDZ256rr VR256X:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(xor
|
||||
(bc_v8i64 (v16i1sextv16i32)),
|
||||
(bc_v8i64 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
|
||||
(VPABSDZrr VR512:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v8i64 (v8i1sextv8i64)),
|
||||
(bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
|
||||
(VPABSQZrr VR512:$src)>;
|
||||
}
|
||||
let Predicates = [HasBWI] in {
|
||||
def : Pat<(xor
|
||||
(bc_v8i64 (v64i1sextv64i8)),
|
||||
(bc_v8i64 (add (v64i8 VR512:$src), (v64i1sextv64i8)))),
|
||||
(VPABSBZrr VR512:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v8i64 (v32i1sextv32i16)),
|
||||
(bc_v8i64 (add (v32i16 VR512:$src), (v32i1sextv32i16)))),
|
||||
(VPABSWZrr VR512:$src)>;
|
||||
}
|
||||
defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>;
|
||||
|
||||
multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
|
||||
|
||||
|
@ -355,7 +355,6 @@ def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
|
||||
def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
|
||||
def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
|
||||
|
||||
def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
|
||||
def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
|
||||
|
||||
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
|
||||
|
@ -5266,84 +5266,24 @@ multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt,
|
||||
Sched<[WriteVecALULd]>;
|
||||
}
|
||||
|
||||
// Helper fragments to match sext vXi1 to vXiY.
|
||||
def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
|
||||
VR128:$src))>;
|
||||
def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i8 15)))>;
|
||||
def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i8 31)))>;
|
||||
def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
|
||||
VR256:$src))>;
|
||||
def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>;
|
||||
def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>;
|
||||
|
||||
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
|
||||
defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, X86Abs, loadv2i64>, VEX, VEX_WIG;
|
||||
defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, X86Abs, loadv2i64>, VEX, VEX_WIG;
|
||||
defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, loadv2i64>, VEX, VEX_WIG;
|
||||
defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, loadv2i64>, VEX, VEX_WIG;
|
||||
}
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, X86Abs, loadv2i64>, VEX, VEX_WIG;
|
||||
defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, loadv2i64>, VEX, VEX_WIG;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
|
||||
def : Pat<(xor
|
||||
(bc_v2i64 (v16i1sextv16i8)),
|
||||
(bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
|
||||
(VPABSBrr VR128:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v2i64 (v8i1sextv8i16)),
|
||||
(bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
|
||||
(VPABSWrr VR128:$src)>;
|
||||
}
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
def : Pat<(xor
|
||||
(bc_v2i64 (v4i1sextv4i32)),
|
||||
(bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
|
||||
(VPABSDrr VR128:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
|
||||
defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, X86Abs>, VEX, VEX_L, VEX_WIG;
|
||||
defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, X86Abs>, VEX, VEX_L, VEX_WIG;
|
||||
defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs>, VEX, VEX_L, VEX_WIG;
|
||||
defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs>, VEX, VEX_L, VEX_WIG;
|
||||
}
|
||||
let Predicates = [HasAVX2, NoVLX] in {
|
||||
defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, X86Abs>, VEX, VEX_L, VEX_WIG;
|
||||
defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs>, VEX, VEX_L, VEX_WIG;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
|
||||
def : Pat<(xor
|
||||
(bc_v4i64 (v32i1sextv32i8)),
|
||||
(bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))),
|
||||
(VPABSBYrr VR256:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v4i64 (v16i1sextv16i16)),
|
||||
(bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))),
|
||||
(VPABSWYrr VR256:$src)>;
|
||||
}
|
||||
let Predicates = [HasAVX2, NoVLX] in {
|
||||
def : Pat<(xor
|
||||
(bc_v4i64 (v8i1sextv8i32)),
|
||||
(bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))),
|
||||
(VPABSDYrr VR256:$src)>;
|
||||
}
|
||||
|
||||
defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, X86Abs, memopv2i64>;
|
||||
defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, X86Abs, memopv2i64>;
|
||||
defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, X86Abs, memopv2i64>;
|
||||
|
||||
let Predicates = [UseSSSE3] in {
|
||||
def : Pat<(xor
|
||||
(bc_v2i64 (v16i1sextv16i8)),
|
||||
(bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
|
||||
(PABSBrr VR128:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v2i64 (v8i1sextv8i16)),
|
||||
(bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
|
||||
(PABSWrr VR128:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v2i64 (v4i1sextv4i32)),
|
||||
(bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
|
||||
(PABSDrr VR128:$src)>;
|
||||
}
|
||||
defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, memopv2i64>;
|
||||
defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, memopv2i64>;
|
||||
defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>;
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// SSSE3 - Packed Binary Operator Instructions
|
||||
|
@ -370,9 +370,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx_vpermilvar_pd_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
|
||||
X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
|
||||
X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
|
||||
X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
|
||||
@ -838,18 +838,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86ISD::FMULS_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FMULS_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
|
||||
@ -1693,9 +1693,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
|
||||
X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0),
|
||||
X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0),
|
||||
X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, X86ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, ISD::ABS, 0),
|
||||
X86_INTRINSIC_DATA(ssse3_phadd_d_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
|
||||
X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
|
||||
X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0),
|
||||
|
@ -1,13 +1,11 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s
|
||||
|
||||
; FIXME: Various missed opportunities to simplify integer absolute instructions.
|
||||
|
||||
; fold (abs c1) -> c2
|
||||
define <4 x i32> @combine_v4i32_abs_constant() {
|
||||
; CHECK-LABEL: combine_v4i32_abs_constant:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpabsd {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,3,2147483648]
|
||||
; CHECK-NEXT: retq
|
||||
%1 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> <i32 0, i32 -1, i32 3, i32 -2147483648>)
|
||||
ret <4 x i32> %1
|
||||
@ -16,7 +14,7 @@ define <4 x i32> @combine_v4i32_abs_constant() {
|
||||
define <16 x i16> @combine_v16i16_abs_constant() {
|
||||
; CHECK-LABEL: combine_v16i16_abs_constant:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpabsw {{.*}}(%rip), %ymm0
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,1,3,3,7,7,255,255,4096,4096,32767,32767,32768,32768,0]
|
||||
; CHECK-NEXT: retq
|
||||
%1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> <i16 0, i16 1, i16 -1, i16 3, i16 -3, i16 7, i16 -7, i16 255, i16 -255, i16 4096, i16 -4096, i16 32767, i16 -32767, i16 -32768, i16 32768, i16 65536>)
|
||||
ret <16 x i16> %1
|
||||
@ -27,7 +25,6 @@ define <8 x i16> @combine_v8i16_abs_abs(<8 x i16> %a) {
|
||||
; CHECK-LABEL: combine_v8i16_abs_abs:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpabsw %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpabsw %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%a1 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a)
|
||||
%n2 = sub <8 x i16> zeroinitializer, %a1
|
||||
@ -40,7 +37,6 @@ define <32 x i8> @combine_v32i8_abs_abs(<32 x i8> %a) {
|
||||
; CHECK-LABEL: combine_v32i8_abs_abs:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpabsb %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpabsb %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%n1 = sub <32 x i8> zeroinitializer, %a
|
||||
%b1 = icmp slt <32 x i8> %a, zeroinitializer
|
||||
|
@ -147,14 +147,10 @@ define <8 x i32> @test_abs_gt_v8i32(<8 x i32> %a) nounwind {
|
||||
;
|
||||
; AVX1-LABEL: test_abs_gt_v8i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1
|
||||
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpabsd %xmm0, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpabsd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_abs_gt_v8i32:
|
||||
@ -193,14 +189,10 @@ define <8 x i32> @test_abs_ge_v8i32(<8 x i32> %a) nounwind {
|
||||
;
|
||||
; AVX1-LABEL: test_abs_ge_v8i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1
|
||||
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpabsd %xmm0, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpabsd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_abs_ge_v8i32:
|
||||
@ -239,14 +231,10 @@ define <16 x i16> @test_abs_gt_v16i16(<16 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX1-LABEL: test_abs_gt_v16i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpsraw $15, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsraw $15, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpaddw %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1
|
||||
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpabsw %xmm0, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpabsw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_abs_gt_v16i16:
|
||||
@ -285,15 +273,10 @@ define <32 x i8> @test_abs_lt_v32i8(<32 x i8> %a) nounwind {
|
||||
;
|
||||
; AVX1-LABEL: test_abs_lt_v32i8:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm4
|
||||
; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vxorps %ymm4, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpabsb %xmm0, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpabsb %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_abs_lt_v32i8:
|
||||
@ -332,14 +315,10 @@ define <8 x i32> @test_abs_le_v8i32(<8 x i32> %a) nounwind {
|
||||
;
|
||||
; AVX1-LABEL: test_abs_le_v8i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1
|
||||
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpabsd %xmm0, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpabsd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_abs_le_v8i32:
|
||||
@ -388,22 +367,14 @@ define <16 x i32> @test_abs_le_16i32(<16 x i32> %a) nounwind {
|
||||
;
|
||||
; AVX1-LABEL: test_abs_le_16i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpsrad $31, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm4
|
||||
; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2
|
||||
; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; AVX1-NEXT: vpsrad $31, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrad $31, %xmm1, %xmm4
|
||||
; AVX1-NEXT: vpaddd %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2
|
||||
; AVX1-NEXT: vxorps %ymm2, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vpabsd %xmm0, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpabsd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
||||
; AVX1-NEXT: vpabsd %xmm1, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||
; AVX1-NEXT: vpabsd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_abs_le_16i32:
|
||||
@ -450,9 +421,7 @@ define <2 x i64> @test_abs_ge_v2i64(<2 x i64> %a) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: test_abs_ge_v2i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsraq $63, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpabsq %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%tmp1neg = sub <2 x i64> zeroinitializer, %a
|
||||
%b = icmp sge <2 x i64> %a, zeroinitializer
|
||||
@ -499,9 +468,7 @@ define <4 x i64> @test_abs_gt_v4i64(<4 x i64> %a) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: test_abs_gt_v4i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsraq $63, %ymm0, %ymm1
|
||||
; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpabsq %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%tmp1neg = sub <4 x i64> zeroinitializer, %a
|
||||
%b = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1>
|
||||
@ -691,23 +658,14 @@ define <64 x i8> @test_abs_lt_v64i8(<64 x i8> %a) nounwind {
|
||||
;
|
||||
; AVX1-LABEL: test_abs_lt_v64i8:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm4
|
||||
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm5
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm6
|
||||
; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vxorps %ymm6, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm4
|
||||
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm5
|
||||
; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vxorps %ymm5, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vpabsb %xmm0, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpabsb %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
||||
; AVX1-NEXT: vpabsb %xmm1, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||
; AVX1-NEXT: vpabsb %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_abs_lt_v64i8:
|
||||
@ -763,22 +721,14 @@ define <32 x i16> @test_abs_gt_v32i16(<32 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX1-LABEL: test_abs_gt_v32i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpsraw $15, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsraw $15, %xmm0, %xmm4
|
||||
; AVX1-NEXT: vpaddw %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2
|
||||
; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; AVX1-NEXT: vpsraw $15, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsraw $15, %xmm1, %xmm4
|
||||
; AVX1-NEXT: vpaddw %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2
|
||||
; AVX1-NEXT: vxorps %ymm2, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vpabsw %xmm0, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpabsw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
||||
; AVX1-NEXT: vpabsw %xmm1, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||
; AVX1-NEXT: vpabsw %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_abs_gt_v32i16:
|
||||
|
Loading…
x
Reference in New Issue
Block a user