mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-19 17:47:38 +00:00
Add wider vector/integer support for PR12312
- Enhance the fix to PR12312 to support wider integer, such as 256-bit integer. If more than 1 fully evaluated vectors are found, POR them first followed by the final PTEST. llvm-svn: 163832
This commit is contained in:
parent
7c620b0d5f
commit
0c0da113c5
@ -8347,6 +8347,98 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
|
||||
return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT));
|
||||
}
|
||||
|
||||
// LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able.
|
||||
//
|
||||
SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const {
|
||||
assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
|
||||
|
||||
if (!Subtarget->hasSSE41())
|
||||
return SDValue();
|
||||
|
||||
if (!Op->hasOneUse())
|
||||
return SDValue();
|
||||
|
||||
SDNode *N = Op.getNode();
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
|
||||
SmallVector<SDValue, 8> Opnds;
|
||||
DenseMap<SDValue, unsigned> VecInMap;
|
||||
EVT VT = MVT::Other;
|
||||
|
||||
// Recognize a special case where a vector is casted into wide integer to
|
||||
// test all 0s.
|
||||
Opnds.push_back(N->getOperand(0));
|
||||
Opnds.push_back(N->getOperand(1));
|
||||
|
||||
for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
|
||||
SmallVector<SDValue, 8>::const_iterator I = Opnds.begin() + Slot;
|
||||
// BFS traverse all OR'd operands.
|
||||
if (I->getOpcode() == ISD::OR) {
|
||||
Opnds.push_back(I->getOperand(0));
|
||||
Opnds.push_back(I->getOperand(1));
|
||||
// Re-evaluate the number of nodes to be traversed.
|
||||
e += 2; // 2 more nodes (LHS and RHS) are pushed.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Quit if a non-EXTRACT_VECTOR_ELT
|
||||
if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
|
||||
return SDValue();
|
||||
|
||||
// Quit if without a constant index.
|
||||
SDValue Idx = I->getOperand(1);
|
||||
if (!isa<ConstantSDNode>(Idx))
|
||||
return SDValue();
|
||||
|
||||
SDValue ExtractedFromVec = I->getOperand(0);
|
||||
DenseMap<SDValue, unsigned>::iterator M = VecInMap.find(ExtractedFromVec);
|
||||
if (M == VecInMap.end()) {
|
||||
VT = ExtractedFromVec.getValueType();
|
||||
// Quit if not 128/256-bit vector.
|
||||
if (!VT.is128BitVector() && !VT.is256BitVector())
|
||||
return SDValue();
|
||||
// Quit if not the same type.
|
||||
if (VecInMap.begin() != VecInMap.end() &&
|
||||
VT != VecInMap.begin()->first.getValueType())
|
||||
return SDValue();
|
||||
M = VecInMap.insert(std::make_pair(ExtractedFromVec, 0)).first;
|
||||
}
|
||||
M->second |= 1U << cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
}
|
||||
|
||||
assert((VT.is128BitVector() || VT.is256BitVector()) &&
|
||||
"Not extracted from 128-bit vector.");
|
||||
|
||||
unsigned FullMask = (1U << VT.getVectorNumElements()) - 1U;
|
||||
SmallVector<SDValue, 8> VecIns;
|
||||
|
||||
for (DenseMap<SDValue, unsigned>::const_iterator
|
||||
I = VecInMap.begin(), E = VecInMap.end(); I != E; ++I) {
|
||||
// Quit if not all elements are used.
|
||||
if (I->second != FullMask)
|
||||
return SDValue();
|
||||
VecIns.push_back(I->first);
|
||||
}
|
||||
|
||||
EVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
|
||||
|
||||
// Cast all vectors into TestVT for PTEST.
|
||||
for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
|
||||
VecIns[i] = DAG.getNode(ISD::BITCAST, DL, TestVT, VecIns[i]);
|
||||
|
||||
// If more than one full vectors are evaluated, OR them first before PTEST.
|
||||
for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) {
|
||||
// Each iteration will OR 2 nodes and append the result until there is only
|
||||
// 1 node left, i.e. the final OR'd value of all vectors.
|
||||
SDValue LHS = VecIns[Slot];
|
||||
SDValue RHS = VecIns[Slot + 1];
|
||||
VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
|
||||
}
|
||||
|
||||
return DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
|
||||
VecIns.back(), VecIns.back());
|
||||
}
|
||||
|
||||
/// Emit nodes that will be selected as "test Op0,Op0", or something
|
||||
/// equivalent.
|
||||
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
|
||||
@ -8486,9 +8578,17 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
|
||||
switch (ArithOp.getOpcode()) {
|
||||
default: llvm_unreachable("unexpected operator!");
|
||||
case ISD::SUB: Opcode = X86ISD::SUB; break;
|
||||
case ISD::OR: Opcode = X86ISD::OR; break;
|
||||
case ISD::XOR: Opcode = X86ISD::XOR; break;
|
||||
case ISD::AND: Opcode = X86ISD::AND; break;
|
||||
case ISD::OR: {
|
||||
if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) {
|
||||
SDValue EFLAGS = LowerVectorAllZeroTest(Op, DAG);
|
||||
if (EFLAGS.getNode())
|
||||
return EFLAGS;
|
||||
}
|
||||
Opcode = X86ISD::OR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
NumOperands = 2;
|
||||
@ -14205,84 +14305,6 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// checkFlaggedOrCombine - DAG combination on X86ISD::OR, i.e. with EFLAGS
|
||||
/// updated. If only flag result is used and the result is evaluated from a
|
||||
/// series of element extraction, try to combine it into a PTEST.
|
||||
static SDValue checkFlaggedOrCombine(SDValue Or, X86::CondCode &CC,
|
||||
SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
SDNode *N = Or.getNode();
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
|
||||
// Only SSE4.1 and beyond supports PTEST or like.
|
||||
if (!Subtarget->hasSSE41())
|
||||
return SDValue();
|
||||
|
||||
if (N->getOpcode() != X86ISD::OR)
|
||||
return SDValue();
|
||||
|
||||
// Quit if the value result of OR is used.
|
||||
if (N->hasAnyUseOfValue(0))
|
||||
return SDValue();
|
||||
|
||||
// Quit if not used as a boolean value.
|
||||
if (CC != X86::COND_E && CC != X86::COND_NE)
|
||||
return SDValue();
|
||||
|
||||
SmallVector<SDValue, 8> Opnds;
|
||||
SDValue VecIn;
|
||||
EVT VT = MVT::Other;
|
||||
unsigned Mask = 0;
|
||||
|
||||
// Recognize a special case where a vector is casted into wide integer to
|
||||
// test all 0s.
|
||||
Opnds.push_back(N->getOperand(0));
|
||||
Opnds.push_back(N->getOperand(1));
|
||||
|
||||
for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
|
||||
SmallVector<SDValue, 8>::const_iterator I = Opnds.begin() + Slot;
|
||||
// BFS traverse all OR'd operands.
|
||||
if (I->getOpcode() == ISD::OR) {
|
||||
Opnds.push_back(I->getOperand(0));
|
||||
Opnds.push_back(I->getOperand(1));
|
||||
// Re-evaluate the number of nodes to be traversed.
|
||||
e += 2; // 2 more nodes (LHS and RHS) are pushed.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Quit if a non-EXTRACT_VECTOR_ELT
|
||||
if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
|
||||
return SDValue();
|
||||
|
||||
// Quit if without a constant index.
|
||||
SDValue Idx = I->getOperand(1);
|
||||
if (!isa<ConstantSDNode>(Idx))
|
||||
return SDValue();
|
||||
|
||||
// Check if all elements are extracted from the same vector.
|
||||
SDValue ExtractedFromVec = I->getOperand(0);
|
||||
if (VecIn.getNode() == 0) {
|
||||
VT = ExtractedFromVec.getValueType();
|
||||
// FIXME: only 128-bit vector is supported so far.
|
||||
if (!VT.is128BitVector())
|
||||
return SDValue();
|
||||
VecIn = ExtractedFromVec;
|
||||
} else if (VecIn != ExtractedFromVec)
|
||||
return SDValue();
|
||||
|
||||
// Record the constant index.
|
||||
Mask |= 1U << cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
}
|
||||
|
||||
assert(VT.is128BitVector() && "Only 128-bit vector PTEST is supported so far.");
|
||||
|
||||
// Quit if not all elements are used.
|
||||
if (Mask != (1U << VT.getVectorNumElements()) - 1U)
|
||||
return SDValue();
|
||||
|
||||
return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIn, VecIn);
|
||||
}
|
||||
|
||||
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
|
||||
static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
@ -14321,14 +14343,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
|
||||
Ops, array_lengthof(Ops));
|
||||
}
|
||||
|
||||
Flags = checkFlaggedOrCombine(Cond, CC, DAG, Subtarget);
|
||||
if (Flags.getNode()) {
|
||||
SDValue Ops[] = { FalseOp, TrueOp,
|
||||
DAG.getConstant(CC, MVT::i8), Flags };
|
||||
return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(),
|
||||
Ops, array_lengthof(Ops));
|
||||
}
|
||||
|
||||
// If this is a select between two integer constants, try to do some
|
||||
// optimizations. Note that the operands are ordered the opposite of SELECT
|
||||
// operands.
|
||||
@ -15860,12 +15874,6 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
|
||||
}
|
||||
|
||||
Flags = checkFlaggedOrCombine(EFLAGS, CC, DAG, Subtarget);
|
||||
if (Flags.getNode()) {
|
||||
SDValue Cond = DAG.getConstant(CC, MVT::i8);
|
||||
return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
@ -15889,13 +15897,6 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
|
||||
Flags);
|
||||
}
|
||||
|
||||
Flags = checkFlaggedOrCombine(EFLAGS, CC, DAG, Subtarget);
|
||||
if (Flags.getNode()) {
|
||||
SDValue Cond = DAG.getConstant(CC, MVT::i8);
|
||||
return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
|
||||
Flags);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -811,6 +811,8 @@ namespace llvm {
|
||||
SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const;
|
||||
|
||||
virtual SDValue
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse41,-avx < %s | FileCheck %s --check-prefix SSE41
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix AVX
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2 < %s | FileCheck %s --check-prefix AVX
|
||||
|
||||
define i32 @veccond(<4 x i32> %input) {
|
||||
define i32 @veccond128(<4 x i32> %input) {
|
||||
entry:
|
||||
%0 = bitcast <4 x i32> %input to i128
|
||||
%1 = icmp ne i128 %0, 0
|
||||
@ -11,38 +11,145 @@ if-true-block: ; preds = %entry
|
||||
ret i32 0
|
||||
endif-block: ; preds = %entry,
|
||||
ret i32 1
|
||||
; SSE41: veccond
|
||||
; SSE41: veccond128
|
||||
; SSE41: ptest
|
||||
; SSE41: ret
|
||||
; AVX: veccond
|
||||
; AVX: vptest
|
||||
; AVX: veccond128
|
||||
; AVX: vptest %xmm{{.*}}, %xmm{{.*}}
|
||||
; AVX: ret
|
||||
}
|
||||
|
||||
define i32 @vectest(<4 x i32> %input) {
|
||||
define i32 @veccond256(<8 x i32> %input) {
|
||||
entry:
|
||||
%0 = bitcast <8 x i32> %input to i256
|
||||
%1 = icmp ne i256 %0, 0
|
||||
br i1 %1, label %if-true-block, label %endif-block
|
||||
|
||||
if-true-block: ; preds = %entry
|
||||
ret i32 0
|
||||
endif-block: ; preds = %entry,
|
||||
ret i32 1
|
||||
; SSE41: veccond256
|
||||
; SSE41: por
|
||||
; SSE41: ptest
|
||||
; SSE41: ret
|
||||
; AVX: veccond256
|
||||
; AVX: vptest %ymm{{.*}}, %ymm{{.*}}
|
||||
; AVX: ret
|
||||
}
|
||||
|
||||
define i32 @veccond512(<16 x i32> %input) {
|
||||
entry:
|
||||
%0 = bitcast <16 x i32> %input to i512
|
||||
%1 = icmp ne i512 %0, 0
|
||||
br i1 %1, label %if-true-block, label %endif-block
|
||||
|
||||
if-true-block: ; preds = %entry
|
||||
ret i32 0
|
||||
endif-block: ; preds = %entry,
|
||||
ret i32 1
|
||||
; SSE41: veccond512
|
||||
; SSE41: por
|
||||
; SSE41: por
|
||||
; SSE41: por
|
||||
; SSE41: ptest
|
||||
; SSE41: ret
|
||||
; AVX: veccond512
|
||||
; AVX: vorps
|
||||
; AVX: vptest %ymm{{.*}}, %ymm{{.*}}
|
||||
; AVX: ret
|
||||
}
|
||||
|
||||
define i32 @vectest128(<4 x i32> %input) {
|
||||
entry:
|
||||
%0 = bitcast <4 x i32> %input to i128
|
||||
%1 = icmp ne i128 %0, 0
|
||||
%2 = zext i1 %1 to i32
|
||||
ret i32 %2
|
||||
; SSE41: vectest
|
||||
; SSE41: vectest128
|
||||
; SSE41: ptest
|
||||
; SSE41: ret
|
||||
; AVX: vectest
|
||||
; AVX: vptest
|
||||
; AVX: vectest128
|
||||
; AVX: vptest %xmm{{.*}}, %xmm{{.*}}
|
||||
; AVX: ret
|
||||
}
|
||||
|
||||
define i32 @vecsel(<4 x i32> %input, i32 %a, i32 %b) {
|
||||
define i32 @vectest256(<8 x i32> %input) {
|
||||
entry:
|
||||
%0 = bitcast <8 x i32> %input to i256
|
||||
%1 = icmp ne i256 %0, 0
|
||||
%2 = zext i1 %1 to i32
|
||||
ret i32 %2
|
||||
; SSE41: vectest256
|
||||
; SSE41: por
|
||||
; SSE41: ptest
|
||||
; SSE41: ret
|
||||
; AVX: vectest256
|
||||
; AVX: vptest %ymm{{.*}}, %ymm{{.*}}
|
||||
; AVX: ret
|
||||
}
|
||||
|
||||
define i32 @vectest512(<16 x i32> %input) {
|
||||
entry:
|
||||
%0 = bitcast <16 x i32> %input to i512
|
||||
%1 = icmp ne i512 %0, 0
|
||||
%2 = zext i1 %1 to i32
|
||||
ret i32 %2
|
||||
; SSE41: vectest512
|
||||
; SSE41: por
|
||||
; SSE41: por
|
||||
; SSE41: por
|
||||
; SSE41: ptest
|
||||
; SSE41: ret
|
||||
; AVX: vectest512
|
||||
; AVX: vorps
|
||||
; AVX: vptest %ymm{{.*}}, %ymm{{.*}}
|
||||
; AVX: ret
|
||||
}
|
||||
|
||||
define i32 @vecsel128(<4 x i32> %input, i32 %a, i32 %b) {
|
||||
entry:
|
||||
%0 = bitcast <4 x i32> %input to i128
|
||||
%1 = icmp ne i128 %0, 0
|
||||
%2 = select i1 %1, i32 %a, i32 %b
|
||||
ret i32 %2
|
||||
; SSE41: vecsel
|
||||
; SSE41: vecsel128
|
||||
; SSE41: ptest
|
||||
; SSE41: ret
|
||||
; AVX: vecsel
|
||||
; AVX: vptest
|
||||
; AVX: vecsel128
|
||||
; AVX: vptest %xmm{{.*}}, %xmm{{.*}}
|
||||
; AVX: ret
|
||||
}
|
||||
|
||||
define i32 @vecsel256(<8 x i32> %input, i32 %a, i32 %b) {
|
||||
entry:
|
||||
%0 = bitcast <8 x i32> %input to i256
|
||||
%1 = icmp ne i256 %0, 0
|
||||
%2 = select i1 %1, i32 %a, i32 %b
|
||||
ret i32 %2
|
||||
; SSE41: vecsel256
|
||||
; SSE41: por
|
||||
; SSE41: ptest
|
||||
; SSE41: ret
|
||||
; AVX: vecsel256
|
||||
; AVX: vptest %ymm{{.*}}, %ymm{{.*}}
|
||||
; AVX: ret
|
||||
}
|
||||
|
||||
define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) {
|
||||
entry:
|
||||
%0 = bitcast <16 x i32> %input to i512
|
||||
%1 = icmp ne i512 %0, 0
|
||||
%2 = select i1 %1, i32 %a, i32 %b
|
||||
ret i32 %2
|
||||
; SSE41: vecsel512
|
||||
; SSE41: por
|
||||
; SSE41: por
|
||||
; SSE41: por
|
||||
; SSE41: ptest
|
||||
; SSE41: ret
|
||||
; AVX: vecsel512
|
||||
; AVX: vorps
|
||||
; AVX: vptest %ymm{{.*}}, %ymm{{.*}}
|
||||
; AVX: ret
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user