mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-26 21:20:37 +00:00
Lower BUILD_VECTOR to SHUFFLE + INSERT_VECTOR_ELT for X86
- If INSERT_VECTOR_ELT is supported (above SSE2, either by custom sequence of legal insn), transform BUILD_VECTOR into SHUFFLE + INSERT_VECTOR_ELT if most of elements could be built from SHUFFLE with few (so far 1) elements being inserted. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166288 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
239fd44f7a
commit
facace808c
@ -5175,6 +5175,80 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue
|
||||
X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
// Skip if insert_vec_elt is not supported.
|
||||
if (!isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
|
||||
return SDValue();
|
||||
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
unsigned NumElems = Op.getNumOperands();
|
||||
|
||||
SDValue VecIn1;
|
||||
SDValue VecIn2;
|
||||
SmallVector<unsigned, 4> InsertIndices;
|
||||
SmallVector<int, 8> Mask(NumElems, -1);
|
||||
|
||||
for (unsigned i = 0; i != NumElems; ++i) {
|
||||
unsigned Opc = Op.getOperand(i).getOpcode();
|
||||
|
||||
if (Opc == ISD::UNDEF)
|
||||
continue;
|
||||
|
||||
if (Opc != ISD::EXTRACT_VECTOR_ELT) {
|
||||
// Quit if more than 1 elements need inserting.
|
||||
if (InsertIndices.size() > 1)
|
||||
return SDValue();
|
||||
|
||||
InsertIndices.push_back(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
|
||||
SDValue ExtIdx = Op.getOperand(i).getOperand(1);
|
||||
|
||||
// Quit if extracted from vector of different type.
|
||||
if (ExtractedFromVec.getValueType() != VT)
|
||||
return SDValue();
|
||||
|
||||
// Quit if non-constant index.
|
||||
if (!isa<ConstantSDNode>(ExtIdx))
|
||||
return SDValue();
|
||||
|
||||
if (VecIn1.getNode() == 0)
|
||||
VecIn1 = ExtractedFromVec;
|
||||
else if (VecIn1 != ExtractedFromVec) {
|
||||
if (VecIn2.getNode() == 0)
|
||||
VecIn2 = ExtractedFromVec;
|
||||
else if (VecIn2 != ExtractedFromVec)
|
||||
// Quit if more than 2 vectors to shuffle
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
unsigned Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
|
||||
|
||||
if (ExtractedFromVec == VecIn1)
|
||||
Mask[i] = Idx;
|
||||
else if (ExtractedFromVec == VecIn2)
|
||||
Mask[i] = Idx + NumElems;
|
||||
}
|
||||
|
||||
if (VecIn1.getNode() == 0)
|
||||
return SDValue();
|
||||
|
||||
VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
|
||||
SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]);
|
||||
for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
|
||||
unsigned Idx = InsertIndices[i];
|
||||
NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
|
||||
DAG.getIntPtrConstant(Idx));
|
||||
}
|
||||
|
||||
return NV;
|
||||
}
|
||||
|
||||
SDValue
|
||||
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
@ -5451,6 +5525,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (LD.getNode())
|
||||
return LD;
|
||||
|
||||
// Check for a build vector from mostly shuffle plus few inserting.
|
||||
SDValue Sh = buildFromShuffleMostly(Op, DAG);
|
||||
if (Sh.getNode())
|
||||
return Sh;
|
||||
|
||||
// For SSE 4.1, use insertps to put the high elements into the low element.
|
||||
if (getSubtarget()->hasSSE41()) {
|
||||
SDValue Result;
|
||||
|
@ -825,9 +825,10 @@ namespace llvm {
|
||||
|
||||
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
// Utility functions to help LowerVECTOR_SHUFFLE
|
||||
// Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR
|
||||
SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
|
15
test/CodeGen/X86/buildvec-insertvec.ll
Normal file
15
test/CodeGen/X86/buildvec-insertvec.ll
Normal file
@ -0,0 +1,15 @@
|
||||
; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
|
||||
|
||||
define void @foo(<3 x float> %in, <4 x i8>* nocapture %out) nounwind {
|
||||
%t0 = fptoui <3 x float> %in to <3 x i8>
|
||||
%t1 = shufflevector <3 x i8> %t0, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
||||
%t2 = insertelement <4 x i8> %t1, i8 -1, i32 3
|
||||
store <4 x i8> %t2, <4 x i8>* %out, align 4
|
||||
ret void
|
||||
; CHECK: foo
|
||||
; CHECK: cvttps2dq
|
||||
; CHECK-NOT: pextrd
|
||||
; CHECK: pinsrd
|
||||
; CHECK-NEXT: pshufb
|
||||
; CHECK: ret
|
||||
}
|
Loading…
Reference in New Issue
Block a user