mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-27 13:42:24 +00:00
SelectionDAG: Teach the legalizer to split SETCC if VSELECT needs splitting too.
The Type Legalizer recognizes that VSELECT needs to be split, because the type is to wide for the given target. The same does not always apply to SETCC, because less space is required to encode the result of a comparison. As a result VSELECT is split and SETCC is unrolled into scalar comparisons. This commit fixes the issue by checking for VSELECT-SETCC patterns in the DAG Combiner. If a matching pattern is found, then the result mask of SETCC is promoted to the expected vector mask type for the given target. This mask has usually the same size as the VSELECT return type (except for Intel KNL). Now the type legalizer will split both VSELECT and SETCC. This allows the following X86 DAG Combine code to sucessfully detect the MIN/MAX pattern. This fixes PR16695, PR17002, and <rdar://problem/14594431>. Reviewed by Nadav git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193676 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6a4860af7a
commit
4eced19c50
@ -4346,6 +4346,28 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Treat SETCC as a vector mask and promote the result type based on the
|
||||||
|
// targets expected SETCC result type. This will ensure that SETCC and VSELECT
|
||||||
|
// are both split by the type legalizer. This is done to prevent the type
|
||||||
|
// legalizer from unrolling SETCC into scalar comparions.
|
||||||
|
EVT SelectVT = N->getValueType(0);
|
||||||
|
EVT MaskVT = getSetCCResultType(SelectVT);
|
||||||
|
if (N0.getOpcode() == ISD::SETCC && N0.getValueType() != MaskVT) {
|
||||||
|
SDLoc MaskDL(N0);
|
||||||
|
|
||||||
|
// Extend the mask to the desired value type.
|
||||||
|
ISD::NodeType ExtendCode =
|
||||||
|
TargetLowering::getExtendForContent(TLI.getBooleanContents(true));
|
||||||
|
SDValue Mask = DAG.getNode(ExtendCode, MaskDL, MaskVT, N0);
|
||||||
|
|
||||||
|
AddToWorkList(Mask.getNode());
|
||||||
|
|
||||||
|
SDValue LHS = N->getOperand(1);
|
||||||
|
SDValue RHS = N->getOperand(2);
|
||||||
|
|
||||||
|
return DAG.getNode(ISD::VSELECT, DL, SelectVT, Mask, LHS, RHS);
|
||||||
|
}
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -492,14 +492,19 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
|
|||||||
SDValue Cond = N->getOperand(0);
|
SDValue Cond = N->getOperand(0);
|
||||||
CL = CH = Cond;
|
CL = CH = Cond;
|
||||||
if (Cond.getValueType().isVector()) {
|
if (Cond.getValueType().isVector()) {
|
||||||
assert(Cond.getValueType().getVectorElementType() == MVT::i1 &&
|
if (Cond.getOpcode() == ISD::SETCC) {
|
||||||
"Condition legalized before result?");
|
assert(Cond.getValueType() == getSetCCResultType(N->getValueType(0)) &&
|
||||||
unsigned NumElements = Cond.getValueType().getVectorNumElements();
|
"Condition has not been prepared for split!");
|
||||||
EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2);
|
GetSplitVector(Cond, CL, CH);
|
||||||
CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
|
} else {
|
||||||
DAG.getConstant(0, TLI.getVectorIdxTy()));
|
EVT ETy = Cond.getValueType().getVectorElementType();
|
||||||
CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
|
unsigned NumElements = Cond.getValueType().getVectorNumElements();
|
||||||
DAG.getConstant(NumElements / 2, TLI.getVectorIdxTy()));
|
EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), ETy, NumElements / 2);
|
||||||
|
CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
|
||||||
|
DAG.getConstant(0, TLI.getVectorIdxTy()));
|
||||||
|
CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
|
||||||
|
DAG.getConstant(NumElements / 2, TLI.getVectorIdxTy()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
|
Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
|
||||||
|
@ -1546,7 +1546,16 @@ void X86TargetLowering::resetOperationActions() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
|
EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
|
||||||
if (!VT.isVector()) return MVT::i8;
|
if (!VT.isVector())
|
||||||
|
return MVT::i8;
|
||||||
|
|
||||||
|
const TargetMachine &TM = getTargetMachine();
|
||||||
|
if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512())
|
||||||
|
switch(VT.getVectorNumElements()) {
|
||||||
|
case 8: return MVT::v8i1;
|
||||||
|
case 16: return MVT::v16i1;
|
||||||
|
}
|
||||||
|
|
||||||
return VT.changeVectorElementTypeToInteger();
|
return VT.changeVectorElementTypeToInteger();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
42
test/CodeGen/X86/vec_split.ll
Normal file
42
test/CodeGen/X86/vec_split.ll
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
|
||||||
|
; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
|
||||||
|
; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
|
||||||
|
|
||||||
|
define <16 x i16> @split16(<16 x i16> %a, <16 x i16> %b, <16 x i8> %__mask) {
|
||||||
|
; SSE4-LABEL: split16:
|
||||||
|
; SSE4: pminuw
|
||||||
|
; SSE4: pminuw
|
||||||
|
; SSE4: ret
|
||||||
|
; AVX1-LABEL: split16:
|
||||||
|
; AVX1: vpminuw
|
||||||
|
; AVX1: vpminuw
|
||||||
|
; AVX1: ret
|
||||||
|
; AVX2-LABEL: split16:
|
||||||
|
; AVX2: vpminuw
|
||||||
|
; AVX2: ret
|
||||||
|
%1 = icmp ult <16 x i16> %a, %b
|
||||||
|
%2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
|
||||||
|
ret <16 x i16> %2
|
||||||
|
}
|
||||||
|
|
||||||
|
define <32 x i16> @split32(<32 x i16> %a, <32 x i16> %b, <32 x i8> %__mask) {
|
||||||
|
; SSE4-LABEL: split32:
|
||||||
|
; SSE4: pminuw
|
||||||
|
; SSE4: pminuw
|
||||||
|
; SSE4: pminuw
|
||||||
|
; SSE4: pminuw
|
||||||
|
; SSE4: ret
|
||||||
|
; AVX1-LABEL: split32:
|
||||||
|
; AVX1: vpminuw
|
||||||
|
; AVX1: vpminuw
|
||||||
|
; AVX1: vpminuw
|
||||||
|
; AVX1: vpminuw
|
||||||
|
; AVX1: ret
|
||||||
|
; AVX2-LABEL: split32:
|
||||||
|
; AVX2: vpminuw
|
||||||
|
; AVX2: vpminuw
|
||||||
|
; AVX2: ret
|
||||||
|
%1 = icmp ult <32 x i16> %a, %b
|
||||||
|
%2 = select <32 x i1> %1, <32 x i16> %a, <32 x i16> %b
|
||||||
|
ret <32 x i16> %2
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user