mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-15 07:59:57 +00:00
Improve ISel across lane float min/max reduction
In vectorized float min/max reduction code, the final "reduce" step is sub-optimal. In AArch64, this change wll combine : svn0 = vector_shuffle t0, undef<2,3,u,u> fmin = fminnum t0,svn0 svn1 = vector_shuffle fmin, undef<1,u,u,u> cc = setcc fmin, svn1, ole n0 = extract_vector_elt cc, #0 n1 = extract_vector_elt fmin, #0 n2 = extract_vector_elt fmin, #1 result = select n0, n1,n2 into : result = llvm.aarch64.neon.fminnmv t0 This change extends r247575. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249834 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0425965d3d
commit
6bdac546b9
@ -8750,8 +8750,13 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
|
||||
return SDValue();
|
||||
|
||||
int NumVecElts = VTy.getVectorNumElements();
|
||||
if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)
|
||||
return SDValue();
|
||||
if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
|
||||
if (NumVecElts != 4)
|
||||
return SDValue();
|
||||
} else {
|
||||
if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
int NumExpectedSteps = APInt(8, NumVecElts).logBase2();
|
||||
SDValue PreOp = OpV;
|
||||
@ -8802,6 +8807,8 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
|
||||
PreOp = CurOp;
|
||||
}
|
||||
unsigned Opcode;
|
||||
bool IsIntrinsic = false;
|
||||
|
||||
switch (Op) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected operator for across vector reduction");
|
||||
@ -8820,11 +8827,24 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
|
||||
case ISD::UMIN:
|
||||
Opcode = AArch64ISD::UMINV;
|
||||
break;
|
||||
case ISD::FMAXNUM:
|
||||
Opcode = Intrinsic::aarch64_neon_fmaxnmv;
|
||||
IsIntrinsic = true;
|
||||
break;
|
||||
case ISD::FMINNUM:
|
||||
Opcode = Intrinsic::aarch64_neon_fminnmv;
|
||||
IsIntrinsic = true;
|
||||
break;
|
||||
}
|
||||
SDLoc DL(N);
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),
|
||||
DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp),
|
||||
DAG.getConstant(0, DL, MVT::i64));
|
||||
|
||||
return IsIntrinsic
|
||||
? DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
|
||||
DAG.getConstant(Opcode, DL, MVT::i32), PreOp)
|
||||
: DAG.getNode(
|
||||
ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),
|
||||
DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp),
|
||||
DAG.getConstant(0, DL, MVT::i64));
|
||||
}
|
||||
|
||||
/// Target-specific DAG combine for the across vector min/max reductions.
|
||||
@ -8848,9 +8868,6 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
|
||||
/// becomes :
|
||||
/// %1 = smaxv %0
|
||||
/// %result = extract_vector_elt %1, 0
|
||||
/// FIXME: Currently this function matches only SMAXV, UMAXV, SMINV, and UMINV.
|
||||
/// We could also support other types of across lane reduction available
|
||||
/// in AArch64, including FMAXNMV, FMAXV, FMINNMV, and FMINV.
|
||||
static SDValue
|
||||
performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const AArch64Subtarget *Subtarget) {
|
||||
@ -8878,17 +8895,26 @@ performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
|
||||
SDValue VectorOp = SetCC.getOperand(0);
|
||||
unsigned Op = VectorOp->getOpcode();
|
||||
// Check if the input vector is fed by the operator we want to handle.
|
||||
if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN && Op != ISD::UMIN)
|
||||
if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN &&
|
||||
Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM)
|
||||
return SDValue();
|
||||
|
||||
EVT VTy = VectorOp.getValueType();
|
||||
if (!VTy.isVector())
|
||||
return SDValue();
|
||||
|
||||
EVT EltTy = VTy.getVectorElementType();
|
||||
if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
|
||||
if (VTy.getSizeInBits() < 64)
|
||||
return SDValue();
|
||||
|
||||
EVT EltTy = VTy.getVectorElementType();
|
||||
if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
|
||||
if (EltTy != MVT::f32)
|
||||
return SDValue();
|
||||
} else {
|
||||
if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Check if extracting from the same vector.
|
||||
// For example,
|
||||
// %sc = setcc %vector, %svn1, gt
|
||||
@ -8904,7 +8930,13 @@ performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
|
||||
if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) ||
|
||||
(Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) ||
|
||||
(Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) ||
|
||||
(Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE))
|
||||
(Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) ||
|
||||
(Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE &&
|
||||
CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT &&
|
||||
CC != ISD::SETGE) ||
|
||||
(Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE &&
|
||||
CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT &&
|
||||
CC != ISD::SETLE))
|
||||
return SDValue();
|
||||
|
||||
// Expect to check only lane 0 from the vector SETCC.
|
||||
@ -8963,6 +8995,9 @@ performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG,
|
||||
if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
|
||||
return SDValue();
|
||||
|
||||
if (VTy.getSizeInBits() < 64)
|
||||
return SDValue();
|
||||
|
||||
return tryMatchAcrossLaneShuffleForReduction(N, N0, ISD::ADD, DAG);
|
||||
}
|
||||
|
||||
|
@ -285,3 +285,35 @@ define i64 @umin_D(<2 x i64>* nocapture readonly %arr) {
|
||||
%r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: f_fmaxnmv
|
||||
; CHECK: fmaxnmv
|
||||
define float @f_fmaxnmv(<4 x float>* nocapture readonly %arr) {
|
||||
%rdx.minmax.select = load <4 x float>, <4 x float>* %arr
|
||||
%rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%rdx.minmax.cmp = fcmp fast oge <4 x float> %rdx.minmax.select, %rdx.shuf
|
||||
%rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.minmax.cmp1 = fcmp fast oge <4 x float> %rdx.minmax.select1, %rdx.shuf1
|
||||
%rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0
|
||||
%rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0
|
||||
%rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1
|
||||
%r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: f_fminnmv
|
||||
; CHECK: fminnmv
|
||||
define float @f_fminnmv(<4 x float>* nocapture readonly %arr) {
|
||||
%rdx.minmax.select = load <4 x float>, <4 x float>* %arr
|
||||
%rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%rdx.minmax.cmp = fcmp fast ole <4 x float> %rdx.minmax.select, %rdx.shuf
|
||||
%rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.minmax.cmp1 = fcmp fast ole <4 x float> %rdx.minmax.select1, %rdx.shuf1
|
||||
%rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0
|
||||
%rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0
|
||||
%rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1
|
||||
%r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt
|
||||
ret float %r
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user