Teach DAG combine to fold (buildvec (Xint2fp x)) to (Xint2fp (buildvec x))

- If more than 1 elemennts are defined and target supports the vectorized conversion, use the vectorized one instead to reduce the strength on conversion operation. llvm-svn: 166546
2024-12-16 08:08:01 +00:00 · 2012-10-24 04:14:18 +00:00 · 2012-10-24 04:14:18 +00:00 · 18e40965aa
commit 18e40965aa
parent 70bb8004bd
2 changed files with 85 additions and 0 deletions
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -271,6 +271,7 @@ namespace {
    SDValue ReduceLoadOpStoreWidth(SDNode *N);
    SDValue TransformFPLoadStorePair(SDNode *N);
    SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
+    SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);

    SDValue GetDemandedBits(SDValue V, const APInt &Mask);

@ -8467,6 +8468,72 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
 }

+SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
+  EVT VT = N->getValueType(0);
+
+  unsigned NumInScalars = N->getNumOperands();
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT SrcVT = MVT::Other;
+  unsigned Opcode = ISD::DELETED_NODE;
+  unsigned NumDefs = 0;
+
+  for (unsigned i = 0; i != NumInScalars; ++i) {
+    SDValue In = N->getOperand(i);
+    unsigned Opc = In.getOpcode();
+
+    if (Opc == ISD::UNDEF)
+      continue;
+
+    // If all scalar values are floats and converted from integers.
+    if (Opcode == ISD::DELETED_NODE &&
+        (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
+      Opcode = Opc;
+      // If not supported by target, bail out.
+      if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal &&
+          TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom)
+        return SDValue();
+    }
+    if (Opc != Opcode)
+      return SDValue();
+
+    EVT InVT = In.getOperand(0).getValueType();
+
+    // If all scalar values are typed differently, bail out. It's chosen to
+    // simplify BUILD_VECTOR of integer types.
+    if (SrcVT == MVT::Other)
+      SrcVT = InVT;
+    if (SrcVT != InVT)
+      return SDValue();
+    NumDefs++;
+  }
+
+  // If the vector has just one element defined, it's not worth to fold it into
+  // a vectorized one.
+  if (NumDefs < 2)
+    return SDValue();
+
+  assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
+         && "Should only handle conversion from integer to float.");
+  assert(SrcVT != MVT::Other && "Cannot determine source type!");
+
+  EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
+  SmallVector<SDValue, 8> Opnds;
+  for (unsigned i = 0; i != NumInScalars; ++i) {
+    SDValue In = N->getOperand(i);
+
+    if (In.getOpcode() == ISD::UNDEF)
+      Opnds.push_back(DAG.getUNDEF(SrcVT));
+    else
+      Opnds.push_back(In.getOperand(0));
+  }
+  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
+                           &Opnds[0], Opnds.size());
+  AddToWorkList(BV.getNode());
+
+  return DAG.getNode(Opcode, dl, VT, BV);
+}
+
 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
  unsigned NumInScalars = N->getNumOperands();
  DebugLoc dl = N->getDebugLoc();
@ -8480,6 +8547,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
  if (V.getNode())
    return V;

+  V = reduceBuildVecConvertToConvertBuildVec(N);
+  if (V.getNode())
+    return V;
+
  // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
  // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
  // at most two distinct vectors, turn this into a shuffle node.
--- a/test/CodeGen/X86/cvtv2f32.ll
+++ b/test/CodeGen/X86/cvtv2f32.ll
@ -1,5 +1,19 @@
 ; RUN: llc < %s -mtriple=i686-linux-pc -mcpu=corei7 | FileCheck %s

+define <2 x float> @foo(i32 %x, i32 %y, <2 x float> %v) {
+  %t1 = uitofp i32 %x to float
+  %t2 = insertelement <2 x float> undef, float %t1, i32 0
+  %t3 = uitofp i32 %y to float
+  %t4 = insertelement <2 x float> %t2, float %t3, i32 1
+  %t5 = fmul <2 x float> %v, %t4
+  ret <2 x float> %t5
+; CHECK: foo
+; CHECK: or
+; CHECK: subpd
+; CHECK: cvtpd2ps
+; CHECK: ret
+}
+
 define <2 x float> @bar(<2 x i32> %in) {
  %r = uitofp <2 x i32> %in to <2 x float>
  ret <2 x float> %r