[x86] Hoist a function up to the rest of the non-type-specific lowering

helpers, and re-flow the logic to use early exit and be a bit more readable. No functionality changed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218155 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-26 14:15:53 +00:00 · 2014-09-19 21:52:10 +00:00 · 2014-09-19 21:52:10 +00:00 · f7ca3552ff
commit f7ca3552ff
parent 73f6823621
1 changed files with 74 additions and 75 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -7549,6 +7549,80 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
  return SDValue();
 }

+/// \brief Try to lower insertion of a single element into a zero vector.
+///
+/// This is a common pattern that we have especially efficient patterns to lower
+/// across all subtarget feature sets.
+static SDValue lowerIntegerElementInsertionVectorShuffle(
+    MVT VT, SDLoc DL, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+    const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+  int V2Index = std::find_if(Mask.begin(), Mask.end(),
+                             [&Mask](int M) { return M >= (int)Mask.size(); }) -
+                Mask.begin();
+
+  // Check for a single input from a SCALAR_TO_VECTOR node.
+  // FIXME: All of this should be canonicalized into INSERT_VECTOR_ELT and
+  // all the smarts here sunk into that routine. However, the current
+  // lowering of BUILD_VECTOR makes that nearly impossible until the old
+  // vector shuffle lowering is dead.
+  if (!((V2.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+         Mask[V2Index] == (int)Mask.size()) ||
+        V2.getOpcode() == ISD::BUILD_VECTOR))
+    return SDValue();
+
+  SDValue V2S = V2.getOperand(Mask[V2Index] - Mask.size());
+
+  if (V1.getOpcode() == ISD::BUILD_VECTOR) {
+    for (int M : Mask) {
+      if (M < 0 || M >= (int)Mask.size())
+        continue;
+      SDValue Input = V1.getOperand(M);
+      if (Input.getOpcode() != ISD::UNDEF && !X86::isZeroNode(Input))
+        // A non-zero input!
+        return SDValue();
+    }
+  } else if (!ISD::isBuildVectorAllZeros(V1.getNode())) {
+    return SDValue();
+  }
+
+  // First, we need to zext the scalar if it is smaller than an i32.
+  MVT EltVT = VT.getVectorElementType();
+  assert(EltVT == V2S.getSimpleValueType() &&
+         "Different scalar and element types!");
+  MVT ExtVT = VT;
+  if (EltVT == MVT::i8 || EltVT == MVT::i16) {
+    // Zero-extend directly to i32.
+    ExtVT = MVT::v4i32;
+    V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S);
+  }
+
+  V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT,
+                   DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S));
+  if (ExtVT != VT)
+    V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
+
+  if (V2Index != 0) {
+    // If we have 4 or fewer lanes we can cheaply shuffle the element into
+    // the desired position. Otherwise it is more efficient to do a vector
+    // shift left. We know that we can do a vector shift left because all
+    // the inputs are zero.
+    if (VT.isFloatingPoint() || VT.getVectorNumElements() <= 4) {
+      SmallVector<int, 4> V2Shuffle(Mask.size(), 1);
+      V2Shuffle[V2Index] = 0;
+      V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
+    } else {
+      V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V2);
+      V2 = DAG.getNode(
+          X86ISD::VSHLDQ, DL, MVT::v2i64, V2,
+          DAG.getConstant(
+              V2Index * EltVT.getSizeInBits(),
+              DAG.getTargetLoweringInfo().getScalarShiftAmountTy(MVT::v2i64)));
+      V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
+    }
+  }
+  return V2;
+}
+
 /// \brief Handle lowering of 2-lane 64-bit floating point shuffles.
 ///
 /// This is the basis function for the 2-lane 64-bit shuffles as we have full
@ -7807,81 +7881,6 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                     getV4X86ShuffleImm8ForMask(NewMask, DAG));
 }

-static SDValue lowerIntegerElementInsertionVectorShuffle(
-    MVT VT, SDLoc DL, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    const X86Subtarget *Subtarget, SelectionDAG &DAG) {
-  int V2Index = std::find_if(Mask.begin(), Mask.end(),
-                             [&Mask](int M) { return M >= (int)Mask.size(); }) -
-                Mask.begin();
-
-  // Check for a single input from a SCALAR_TO_VECTOR node.
-  // FIXME: All of this should be canonicalized into INSERT_VECTOR_ELT and
-  // all the smarts here sunk into that routine. However, the current
-  // lowering of BUILD_VECTOR makes that nearly impossible until the old
-  // vector shuffle lowering is dead.
-  if ((Mask[V2Index] == (int)Mask.size() &&
-       V2.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
-      V2.getOpcode() == ISD::BUILD_VECTOR) {
-    SDValue V2S = V2.getOperand(Mask[V2Index] - Mask.size());
-
-    bool V1IsAllZero = false;
-    if (ISD::isBuildVectorAllZeros(V1.getNode())) {
-      V1IsAllZero = true;
-    } else if (V1.getOpcode() == ISD::BUILD_VECTOR) {
-      V1IsAllZero = true;
-      for (int M : Mask) {
-        if (M < 0 || M >= (int)Mask.size())
-          continue;
-        SDValue Input = V1.getOperand(M);
-        if (Input.getOpcode() != ISD::UNDEF && !X86::isZeroNode(Input)) {
-          // A non-zero input!
-          V1IsAllZero = false;
-          break;
-        }
-      }
-    }
-    if (V1IsAllZero) {
-      // First, we need to zext the scalar if it is smaller than an i32.
-      MVT EltVT = VT.getVectorElementType();
-      assert(EltVT == V2S.getSimpleValueType() &&
-             "Different scalar and element types!");
-      MVT ExtVT = VT;
-      if (EltVT == MVT::i8 || EltVT == MVT::i16) {
-        // Zero-extend directly to i32.
-        ExtVT = MVT::v4i32;
-        V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S);
-      }
-
-      V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT,
-                       DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S));
-      if (ExtVT != VT)
-        V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
-
-      if (V2Index != 0) {
-        // If we have 4 or fewer lanes we can cheaply shuffle the element into
-        // the desired position. Otherwise it is more efficient to do a vector
-        // shift left. We know that we can do a vector shift left because all
-        // the inputs are zero.
-        if (VT.getVectorNumElements() <= 4) {
-          SmallVector<int, 4> V2Shuffle(Mask.size(), 1);
-          V2Shuffle[V2Index] = 0;
-          V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
-        } else {
-          V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V2);
-          V2 = DAG.getNode(
-              X86ISD::VSHLDQ, DL, MVT::v2i64, V2,
-              DAG.getConstant(
-                  V2Index * EltVT.getSizeInBits(),
-                  DAG.getTargetLoweringInfo().getScalarShiftAmountTy(MVT::v2i64)));
-          V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
-        }
-      }
-      return V2;
-    }
-  }
-  return SDValue();
-}
-
 /// \brief Lower 4-lane i32 vector shuffles.
 ///
 /// We try to handle these with integer-domain shuffles where we can, but for