From 1a5cc710ee78bf55c3799b49c6fd37619d2dbb7b Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Wed, 24 Oct 2012 04:14:18 +0000 Subject: [PATCH] Teach DAG combine to fold (buildvec (Xint2fp x)) to (Xint2fp (buildvec x)) - If more than 1 elemennts are defined and target supports the vectorized conversion, use the vectorized one instead to reduce the strength on conversion operation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166546 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 71 ++++++++++++++++++++++++ test/CodeGen/X86/cvtv2f32.ll | 14 +++++ 2 files changed, 85 insertions(+) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index da4e1bad06a..8cc35bbdc24 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -271,6 +271,7 @@ namespace { SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue TransformFPLoadStorePair(SDNode *N); SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); + SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -8467,6 +8468,72 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { return DAG.getNode(ISD::BITCAST, dl, VT, BV); } +SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { + EVT VT = N->getValueType(0); + + unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + + EVT SrcVT = MVT::Other; + unsigned Opcode = ISD::DELETED_NODE; + unsigned NumDefs = 0; + + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + unsigned Opc = In.getOpcode(); + + if (Opc == ISD::UNDEF) + continue; + + // If all scalar values are floats and converted from integers. + if (Opcode == ISD::DELETED_NODE && + (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { + Opcode = Opc; + // If not supported by target, bail out. + if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal && + TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom) + return SDValue(); + } + if (Opc != Opcode) + return SDValue(); + + EVT InVT = In.getOperand(0).getValueType(); + + // If all scalar values are typed differently, bail out. It's chosen to + // simplify BUILD_VECTOR of integer types. + if (SrcVT == MVT::Other) + SrcVT = InVT; + if (SrcVT != InVT) + return SDValue(); + NumDefs++; + } + + // If the vector has just one element defined, it's not worth to fold it into + // a vectorized one. + if (NumDefs < 2) + return SDValue(); + + assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) + && "Should only handle conversion from integer to float."); + assert(SrcVT != MVT::Other && "Cannot determine source type!"); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); + SmallVector Opnds; + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + + if (In.getOpcode() == ISD::UNDEF) + Opnds.push_back(DAG.getUNDEF(SrcVT)); + else + Opnds.push_back(In.getOperand(0)); + } + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, + &Opnds[0], Opnds.size()); + AddToWorkList(BV.getNode()); + + return DAG.getNode(Opcode, dl, VT, BV); +} + SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); DebugLoc dl = N->getDebugLoc(); @@ -8480,6 +8547,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (V.getNode()) return V; + V = reduceBuildVecConvertToConvertBuildVec(N); + if (V.getNode()) + return V; + // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from // at most two distinct vectors, turn this into a shuffle node. diff --git a/test/CodeGen/X86/cvtv2f32.ll b/test/CodeGen/X86/cvtv2f32.ll index 5a27a489105..466b0960678 100644 --- a/test/CodeGen/X86/cvtv2f32.ll +++ b/test/CodeGen/X86/cvtv2f32.ll @@ -1,5 +1,19 @@ ; RUN: llc < %s -mtriple=i686-linux-pc -mcpu=corei7 | FileCheck %s +define <2 x float> @foo(i32 %x, i32 %y, <2 x float> %v) { + %t1 = uitofp i32 %x to float + %t2 = insertelement <2 x float> undef, float %t1, i32 0 + %t3 = uitofp i32 %y to float + %t4 = insertelement <2 x float> %t2, float %t3, i32 1 + %t5 = fmul <2 x float> %v, %t4 + ret <2 x float> %t5 +; CHECK: foo +; CHECK: or +; CHECK: subpd +; CHECK: cvtpd2ps +; CHECK: ret +} + define <2 x float> @bar(<2 x i32> %in) { %r = uitofp <2 x i32> %in to <2 x float> ret <2 x float> %r