Fix a crash in AVX2 when trying to broadcast a double into a 128-bit vector. There is no vbroadcastsd xmm, but we do need to support 64-bit integers broadcasted into xmm. Also factor the AVX check into the isVectorBroadcast function. This makes more sense since the AVX2 check was already inside.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147844 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-23 12:40:17 +00:00 · 2012-01-10 08:23:59 +00:00 · 2012-01-10 08:23:59 +00:00 · a937633893
commit a937633893
parent 1accb7ed98
3 changed files with 40 additions and 18 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -4963,7 +4963,10 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
 /// a scalar load.
 /// The scalar load node is returned when a pattern is found,
 /// or SDValue() otherwise.
-static SDValue isVectorBroadcast(SDValue &Op, bool hasAVX2) {
+static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) {
  if (!Subtarget->hasAVX())
    return SDValue();
  EVT VT = Op.getValueType();
  SDValue V = Op;
@ -5022,18 +5025,6 @@ static SDValue isVectorBroadcast(SDValue &Op, bool hasAVX2) {
  bool Is128 = VT.getSizeInBits() == 128;
  unsigned ScalarSize = Ld.getValueType().getSizeInBits();
  if (hasAVX2) {
    // VBroadcast to YMM
    if (Is256 && (ScalarSize == 8  || ScalarSize == 16 ||
                  ScalarSize == 32 || ScalarSize == 64 ))
      return Ld;
    // VBroadcast to XMM
    if (Is128 && (ScalarSize ==  8 || ScalarSize == 32 ||
                  ScalarSize == 16 || ScalarSize == 64 ))
      return Ld;
  }
  // VBroadcast to YMM
  if (Is256 && (ScalarSize == 32 || ScalarSize == 64))
    return Ld;
@ -5042,6 +5033,17 @@ static SDValue isVectorBroadcast(SDValue &Op, bool hasAVX2) {
  if (Is128 && (ScalarSize == 32))
    return Ld;
  // The integer check is needed for the 64-bit into 128-bit so it doesn't match
  // double since there is vbroadcastsd xmm
  if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) {
    // VBroadcast to YMM
    if (Is256 && (ScalarSize == 8 || ScalarSize == 16))
      return Ld;
    // VBroadcast to XMM
    if (Is128 && (ScalarSize ==  8 || ScalarSize == 16 || ScalarSize == 64))
      return Ld;
  }
  // Unsupported broadcast.
  return SDValue();
@ -5077,9 +5079,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
    return getOnesVector(Op.getValueType(), Subtarget->hasAVX2(), DAG, dl);
  }
-  SDValue LD = isVectorBroadcast(Op, Subtarget->hasAVX2());
+  SDValue LD = isVectorBroadcast(Op, Subtarget);
-  if (Subtarget->hasAVX() && LD.getNode())
+  if (LD.getNode())
-      return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD);
+    return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD);
  unsigned EVTBits = ExtVT.getSizeInBits();
@ -6416,8 +6418,8 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
      return Op;
    // Use vbroadcast whenever the splat comes from a foldable load
-    SDValue LD = isVectorBroadcast(Op, Subtarget->hasAVX2());
+    SDValue LD = isVectorBroadcast(Op, Subtarget);
-    if (Subtarget->hasAVX() && LD.getNode())
+    if (LD.getNode())
      return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD);
    // Handle splats by matching through known shuffle masks
--- a/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/test/CodeGen/X86/avx-vbroadcast.ll
@ -89,3 +89,13 @@ define <4 x i32> @H(<4 x i32> %a) {
  ret <4 x i32> %x
 }
 ; CHECK: _I
 ; CHECK-NOT: vbroadcastsd (%
 ; CHECK: ret
 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
 entry:
  %q = load double* %ptr, align 4
  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
  ret <2 x double> %vecinit2.i
 }
--- a/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@ -140,3 +140,13 @@ entry:
  %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
  ret <4 x i64> %q3
 }
 ; make sure that we still don't support broadcast double into 128-bit vector
 ; this used to crash
 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
 entry:
  %q = load double* %ptr, align 4
  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
  ret <2 x double> %vecinit2.i
 }