Lower BUILD_VECTOR to SHUFFLE + INSERT_VECTOR_ELT for X86

- If INSERT_VECTOR_ELT is supported (above SSE2, either by custom
  sequence of legal insn), transform BUILD_VECTOR into SHUFFLE +
  INSERT_VECTOR_ELT if most of elements could be built from SHUFFLE with few
  (so far 1) elements being inserted.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166288 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael Liao 2012-10-19 17:15:18 +00:00
parent 239fd44f7a
commit facace808c
3 changed files with 96 additions and 1 deletions

View File

@ -5175,6 +5175,80 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
return SDValue(); return SDValue();
} }
SDValue
X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
// Skip if insert_vec_elt is not supported.
if (!isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
return SDValue();
DebugLoc DL = Op.getDebugLoc();
unsigned NumElems = Op.getNumOperands();
SDValue VecIn1;
SDValue VecIn2;
SmallVector<unsigned, 4> InsertIndices;
SmallVector<int, 8> Mask(NumElems, -1);
for (unsigned i = 0; i != NumElems; ++i) {
unsigned Opc = Op.getOperand(i).getOpcode();
if (Opc == ISD::UNDEF)
continue;
if (Opc != ISD::EXTRACT_VECTOR_ELT) {
// Quit if more than 1 elements need inserting.
if (InsertIndices.size() > 1)
return SDValue();
InsertIndices.push_back(i);
continue;
}
SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
SDValue ExtIdx = Op.getOperand(i).getOperand(1);
// Quit if extracted from vector of different type.
if (ExtractedFromVec.getValueType() != VT)
return SDValue();
// Quit if non-constant index.
if (!isa<ConstantSDNode>(ExtIdx))
return SDValue();
if (VecIn1.getNode() == 0)
VecIn1 = ExtractedFromVec;
else if (VecIn1 != ExtractedFromVec) {
if (VecIn2.getNode() == 0)
VecIn2 = ExtractedFromVec;
else if (VecIn2 != ExtractedFromVec)
// Quit if more than 2 vectors to shuffle
return SDValue();
}
unsigned Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
if (ExtractedFromVec == VecIn1)
Mask[i] = Idx;
else if (ExtractedFromVec == VecIn2)
Mask[i] = Idx + NumElems;
}
if (VecIn1.getNode() == 0)
return SDValue();
VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]);
for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
unsigned Idx = InsertIndices[i];
NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
DAG.getIntPtrConstant(Idx));
}
return NV;
}
SDValue SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc(); DebugLoc dl = Op.getDebugLoc();
@ -5451,6 +5525,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (LD.getNode()) if (LD.getNode())
return LD; return LD;
// Check for a build vector from mostly shuffle plus few inserting.
SDValue Sh = buildFromShuffleMostly(Op, DAG);
if (Sh.getNode())
return Sh;
// For SSE 4.1, use insertps to put the high elements into the low element. // For SSE 4.1, use insertps to put the high elements into the low element.
if (getSubtarget()->hasSSE41()) { if (getSubtarget()->hasSSE41()) {
SDValue Result; SDValue Result;

View File

@ -825,9 +825,10 @@ namespace llvm {
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
// Utility functions to help LowerVECTOR_SHUFFLE // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR
SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const;
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const; SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;

View File

@ -0,0 +1,15 @@
; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
define void @foo(<3 x float> %in, <4 x i8>* nocapture %out) nounwind {
%t0 = fptoui <3 x float> %in to <3 x i8>
%t1 = shufflevector <3 x i8> %t0, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
%t2 = insertelement <4 x i8> %t1, i8 -1, i32 3
store <4 x i8> %t2, <4 x i8>* %out, align 4
ret void
; CHECK: foo
; CHECK: cvttps2dq
; CHECK-NOT: pextrd
; CHECK: pinsrd
; CHECK-NEXT: pshufb
; CHECK: ret
}