mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-25 21:33:25 +00:00
[AArch64] Use [SU]ABSDIFF nodes instead of intrinsics for ABD/ABA
No functional change, but it preps codegen for the future when SABSDIFF will start getting generated in anger. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242545 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8a8582d6e9
commit
126ab2389e
@ -683,10 +683,12 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
|
|||||||
setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
|
setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
|
||||||
setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
|
setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
|
||||||
|
|
||||||
// [SU][MIN|MAX] are available for all NEON types apart from i64.
|
// [SU][MIN|MAX] and [SU]ABSDIFF are available for all NEON types apart from
|
||||||
|
// i64.
|
||||||
if (!VT.isFloatingPoint() &&
|
if (!VT.isFloatingPoint() &&
|
||||||
VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
|
VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
|
||||||
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
|
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX,
|
||||||
|
ISD::SABSDIFF, ISD::UABSDIFF})
|
||||||
setOperationAction(Opcode, VT.getSimpleVT(), Legal);
|
setOperationAction(Opcode, VT.getSimpleVT(), Legal);
|
||||||
|
|
||||||
if (Subtarget->isLittleEndian()) {
|
if (Subtarget->isLittleEndian()) {
|
||||||
@ -8063,14 +8065,15 @@ static SDValue performAddSubLongCombine(SDNode *N,
|
|||||||
// (aarch64_neon_umull (extract_high (v2i64 vec)))
|
// (aarch64_neon_umull (extract_high (v2i64 vec)))
|
||||||
// (extract_high (v2i64 (dup128 scalar)))))
|
// (extract_high (v2i64 (dup128 scalar)))))
|
||||||
//
|
//
|
||||||
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
|
static SDValue tryCombineLongOpWithDup(SDNode *N,
|
||||||
TargetLowering::DAGCombinerInfo &DCI,
|
TargetLowering::DAGCombinerInfo &DCI,
|
||||||
SelectionDAG &DAG) {
|
SelectionDAG &DAG) {
|
||||||
if (DCI.isBeforeLegalizeOps())
|
if (DCI.isBeforeLegalizeOps())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
SDValue LHS = N->getOperand(1);
|
bool IsIntrinsic = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
|
||||||
SDValue RHS = N->getOperand(2);
|
SDValue LHS = N->getOperand(IsIntrinsic ? 1 : 0);
|
||||||
|
SDValue RHS = N->getOperand(IsIntrinsic ? 2 : 1);
|
||||||
assert(LHS.getValueType().is64BitVector() &&
|
assert(LHS.getValueType().is64BitVector() &&
|
||||||
RHS.getValueType().is64BitVector() &&
|
RHS.getValueType().is64BitVector() &&
|
||||||
"unexpected shape for long operation");
|
"unexpected shape for long operation");
|
||||||
@ -8088,8 +8091,13 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
|
// N could either be an intrinsic or a sabsdiff/uabsdiff node.
|
||||||
N->getOperand(0), LHS, RHS);
|
if (IsIntrinsic)
|
||||||
|
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
|
||||||
|
N->getOperand(0), LHS, RHS);
|
||||||
|
else
|
||||||
|
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
|
||||||
|
LHS, RHS);
|
||||||
}
|
}
|
||||||
|
|
||||||
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
|
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
|
||||||
@ -8208,11 +8216,17 @@ static SDValue performIntrinsicCombine(SDNode *N,
|
|||||||
case Intrinsic::aarch64_neon_fmin:
|
case Intrinsic::aarch64_neon_fmin:
|
||||||
return DAG.getNode(AArch64ISD::FMIN, SDLoc(N), N->getValueType(0),
|
return DAG.getNode(AArch64ISD::FMIN, SDLoc(N), N->getValueType(0),
|
||||||
N->getOperand(1), N->getOperand(2));
|
N->getOperand(1), N->getOperand(2));
|
||||||
|
case Intrinsic::aarch64_neon_sabd:
|
||||||
|
return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0),
|
||||||
|
N->getOperand(1), N->getOperand(2));
|
||||||
|
case Intrinsic::aarch64_neon_uabd:
|
||||||
|
return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
|
||||||
|
N->getOperand(1), N->getOperand(2));
|
||||||
case Intrinsic::aarch64_neon_smull:
|
case Intrinsic::aarch64_neon_smull:
|
||||||
case Intrinsic::aarch64_neon_umull:
|
case Intrinsic::aarch64_neon_umull:
|
||||||
case Intrinsic::aarch64_neon_pmull:
|
case Intrinsic::aarch64_neon_pmull:
|
||||||
case Intrinsic::aarch64_neon_sqdmull:
|
case Intrinsic::aarch64_neon_sqdmull:
|
||||||
return tryCombineLongOpWithDup(IID, N, DCI, DAG);
|
return tryCombineLongOpWithDup(N, DCI, DAG);
|
||||||
case Intrinsic::aarch64_neon_sqshl:
|
case Intrinsic::aarch64_neon_sqshl:
|
||||||
case Intrinsic::aarch64_neon_uqshl:
|
case Intrinsic::aarch64_neon_uqshl:
|
||||||
case Intrinsic::aarch64_neon_sqshlu:
|
case Intrinsic::aarch64_neon_sqshlu:
|
||||||
@ -8237,18 +8251,15 @@ static SDValue performExtendCombine(SDNode *N,
|
|||||||
// helps the backend to decide that an sabdl2 would be useful, saving a real
|
// helps the backend to decide that an sabdl2 would be useful, saving a real
|
||||||
// extract_high operation.
|
// extract_high operation.
|
||||||
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
|
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
|
||||||
N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
|
(N->getOperand(0).getOpcode() == ISD::SABSDIFF ||
|
||||||
|
N->getOperand(0).getOpcode() == ISD::UABSDIFF)) {
|
||||||
SDNode *ABDNode = N->getOperand(0).getNode();
|
SDNode *ABDNode = N->getOperand(0).getNode();
|
||||||
unsigned IID = getIntrinsicID(ABDNode);
|
SDValue NewABD = tryCombineLongOpWithDup(ABDNode, DCI, DAG);
|
||||||
if (IID == Intrinsic::aarch64_neon_sabd ||
|
if (!NewABD.getNode())
|
||||||
IID == Intrinsic::aarch64_neon_uabd) {
|
return SDValue();
|
||||||
SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
|
|
||||||
if (!NewABD.getNode())
|
|
||||||
return SDValue();
|
|
||||||
|
|
||||||
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
|
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
|
||||||
NewABD);
|
NewABD);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is effectively a custom type legalization for AArch64.
|
// This is effectively a custom type legalization for AArch64.
|
||||||
|
@ -2843,8 +2843,8 @@ defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
|
|||||||
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
|
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
|
||||||
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
|
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
|
||||||
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
|
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
|
||||||
TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >;
|
TriOpFrag<(add node:$LHS, (sabsdiff node:$MHS, node:$RHS))> >;
|
||||||
defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
|
defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", sabsdiff>;
|
||||||
defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
|
defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
|
||||||
defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
|
defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
|
||||||
defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
|
defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
|
||||||
@ -2862,8 +2862,8 @@ defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
|
|||||||
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
|
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
|
||||||
defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
|
defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
|
||||||
defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
|
defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
|
||||||
TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
|
TriOpFrag<(add node:$LHS, (uabsdiff node:$MHS, node:$RHS))> >;
|
||||||
defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
|
defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", uabsdiff>;
|
||||||
defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
|
defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
|
||||||
defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
|
defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
|
||||||
defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
|
defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
|
||||||
@ -3381,9 +3381,9 @@ defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn
|
|||||||
defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
|
defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
|
||||||
defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
|
defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
|
||||||
defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
|
defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
|
||||||
int_aarch64_neon_sabd>;
|
sabsdiff>;
|
||||||
defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
|
defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
|
||||||
int_aarch64_neon_sabd>;
|
sabsdiff>;
|
||||||
defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
|
defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
|
||||||
BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
|
BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
|
||||||
defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
|
defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
|
||||||
@ -3404,9 +3404,9 @@ defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
|
|||||||
defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
|
defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
|
||||||
BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
|
BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
|
||||||
defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
|
defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
|
||||||
int_aarch64_neon_uabd>;
|
uabsdiff>;
|
||||||
defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
|
defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
|
||||||
int_aarch64_neon_uabd>;
|
uabsdiff>;
|
||||||
defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
|
defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
|
||||||
BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
|
BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
|
||||||
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
|
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user