[CostModel] Fixed AVX integer shift costs

Targets with AVX but without AVX2 were incorrectly reporting costs of 256-bit integer shifts. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250611 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-03 01:12:59 +00:00 · 2015-10-17 13:23:38 +00:00 · 2015-10-17 13:23:38 +00:00 · 913c649b16
commit 913c649b16
parent 1157bfddb9
4 changed files with 70 additions and 46 deletions
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@ -248,19 +248,31 @@ int X86TTIImpl::getArithmeticInstrCost(
    // custom.
    // Constant splats are cheaper for the following instructions.
    { ISD::SHL,  MVT::v16i8,  1 }, // psllw.
+    { ISD::SHL,  MVT::v32i8,  2 }, // psllw.
    { ISD::SHL,  MVT::v8i16,  1 }, // psllw.
+    { ISD::SHL,  MVT::v16i16, 2 }, // psllw.
    { ISD::SHL,  MVT::v4i32,  1 }, // pslld
+    { ISD::SHL,  MVT::v8i32,  2 }, // pslld
    { ISD::SHL,  MVT::v2i64,  1 }, // psllq.
+    { ISD::SHL,  MVT::v4i64,  2 }, // psllq.

    { ISD::SRL,  MVT::v16i8,  1 }, // psrlw.
+    { ISD::SRL,  MVT::v32i8,  2 }, // psrlw.
    { ISD::SRL,  MVT::v8i16,  1 }, // psrlw.
+    { ISD::SRL,  MVT::v16i16, 2 }, // psrlw.
    { ISD::SRL,  MVT::v4i32,  1 }, // psrld.
+    { ISD::SRL,  MVT::v8i32,  2 }, // psrld.
    { ISD::SRL,  MVT::v2i64,  1 }, // psrlq.
+    { ISD::SRL,  MVT::v4i64,  2 }, // psrlq.

    { ISD::SRA,  MVT::v16i8,  4 }, // psrlw, pand, pxor, psubb.
+    { ISD::SRA,  MVT::v32i8,  8 }, // psrlw, pand, pxor, psubb.
    { ISD::SRA,  MVT::v8i16,  1 }, // psraw.
+    { ISD::SRA,  MVT::v16i16, 2 }, // psraw.
    { ISD::SRA,  MVT::v4i32,  1 }, // psrad.
+    { ISD::SRA,  MVT::v8i32,  2 }, // psrad.
    { ISD::SRA,  MVT::v2i64,  4 }, // 2 x psrad + shuffle.
+    { ISD::SRA,  MVT::v4i64,  8 }, // 2 x psrad + shuffle.

    { ISD::SDIV, MVT::v8i16,  6 }, // pmulhw sequence
    { ISD::UDIV, MVT::v8i16,  6 }, // pmulhuw sequence
@ -282,15 +294,22 @@ int X86TTIImpl::getArithmeticInstrCost(
  if (ISD == ISD::SHL &&
      Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
    EVT VT = LT.second;
+    // Vector shift left by non uniform constant can be lowered
+    // into vector multiply (pmullw/pmulld).
    if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
        (VT == MVT::v4i32 && ST->hasSSE41()))
-      // Vector shift left by non uniform constant can be lowered
-      // into vector multiply (pmullw/pmulld).
      return LT.first;
+
+    // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
+    // sequence of extract + two vector multiply + insert.
+    if ((VT == MVT::v8i32 || VT == MVT::v16i16) &&
+       (ST->hasAVX() && !ST->hasAVX2()))
+      ISD = ISD::MUL;
+
+    // A vector shift left by non uniform constant is converted
+    // into a vector multiply; the new multiply is eventually
+    // lowered into a sequence of shuffles and 2 x pmuludq.
    if (VT == MVT::v4i32 && ST->hasSSE2())
-      // A vector shift left by non uniform constant is converted
-      // into a vector multiply; the new multiply is eventually
-      // lowered into a sequence of shuffles and 2 x pmuludq.
      ISD = ISD::MUL;
  }

@ -304,20 +323,31 @@ int X86TTIImpl::getArithmeticInstrCost(
    // used for vectorization and we don't want to make vectorized code worse
    // than scalar code.
    { ISD::SHL,  MVT::v16i8,    26 }, // cmpgtb sequence.
+    { ISD::SHL,  MVT::v32i8,  2*26 }, // cmpgtb sequence.
    { ISD::SHL,  MVT::v8i16,    32 }, // cmpgtb sequence.
+    { ISD::SHL,  MVT::v16i16, 2*32 }, // cmpgtb sequence.
    { ISD::SHL,  MVT::v4i32,   2*5 }, // We optimized this using mul.
+    { ISD::SHL,  MVT::v8i32, 2*2*5 }, // We optimized this using mul.
    { ISD::SHL,  MVT::v2i64,     4 }, // splat+shuffle sequence.
-    { ISD::SHL,  MVT::v4i64,     8 }, // splat+shuffle sequence.
+    { ISD::SHL,  MVT::v4i64,   2*4 }, // splat+shuffle sequence.

    { ISD::SRL,  MVT::v16i8,    26 }, // cmpgtb sequence.
+    { ISD::SRL,  MVT::v32i8,  2*26 }, // cmpgtb sequence.
    { ISD::SRL,  MVT::v8i16,    32 }, // cmpgtb sequence.
+    { ISD::SRL,  MVT::v16i16, 2*32 }, // cmpgtb sequence.
    { ISD::SRL,  MVT::v4i32,    16 }, // Shift each lane + blend.
+    { ISD::SRL,  MVT::v8i32,  2*16 }, // Shift each lane + blend.
    { ISD::SRL,  MVT::v2i64,     4 }, // splat+shuffle sequence.
+    { ISD::SRL,  MVT::v4i64,   2*4 }, // splat+shuffle sequence.

    { ISD::SRA,  MVT::v16i8,    54 }, // unpacked cmpgtb sequence.
+    { ISD::SRA,  MVT::v32i8,  2*54 }, // unpacked cmpgtb sequence.
    { ISD::SRA,  MVT::v8i16,    32 }, // cmpgtb sequence.
+    { ISD::SRA,  MVT::v16i16, 2*32 }, // cmpgtb sequence.
    { ISD::SRA,  MVT::v4i32,    16 }, // Shift each lane + blend.
+    { ISD::SRA,  MVT::v8i32,  2*16 }, // Shift each lane + blend.
    { ISD::SRA,  MVT::v2i64,    12 }, // srl/xor/sub sequence.
+    { ISD::SRA,  MVT::v4i64,  2*12 }, // srl/xor/sub sequence.

    // It is not a good idea to vectorize division. We have to scalarize it and
    // in the process we will often end up having to spilling regular
@ -363,12 +393,6 @@ int X86TTIImpl::getArithmeticInstrCost(
  if (ST->hasAVX() && !ST->hasAVX2()) {
    EVT VT = LT.second;

-    // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
-    // sequence of extract + two vector multiply + insert.
-    if (ISD == ISD::SHL && (VT == MVT::v8i32 || VT == MVT::v16i16) &&
-        Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)
-      ISD = ISD::MUL;
-
    int Idx = CostTableLookup(AVX1CostTable, ISD, VT);
    if (Idx != -1)
      return LT.first * AVX1CostTable[Idx].Cost;
--- a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@ -26,7 +26,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
 ; SSE2: Found an estimated cost of 24 for instruction:   %shift
 ; SSE41: Found an estimated cost of 24 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
 ; AVX2: Found an estimated cost of 4 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %shift = ashr <4 x i64> %a, %b
@ -49,7 +49,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@ -72,7 +72,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %shift = ashr <16 x i16> %a, %b
@ -94,7 +94,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
 ; SSE2: Found an estimated cost of 108 for instruction:   %shift
 ; SSE41: Found an estimated cost of 108 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 108 for instruction:   %shift
 ; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %shift = ashr <32 x i8> %a, %b
@ -121,7 +121,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
 ; SSE2: Found an estimated cost of 24 for instruction:   %shift
 ; SSE41: Found an estimated cost of 24 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
 ; AVX2: Found an estimated cost of 4 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
@ -146,7 +146,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@ -171,7 +171,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@ -195,7 +195,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
 ; SSE2: Found an estimated cost of 108 for instruction:   %shift
 ; SSE41: Found an estimated cost of 108 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 108 for instruction:   %shift
 ; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@ -222,7 +222,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
 ; SSE2: Found an estimated cost of 24 for instruction:   %shift
 ; SSE41: Found an estimated cost of 24 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
 ; AVX2: Found an estimated cost of 4 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
@ -245,7 +245,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@ -268,7 +268,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
@ -290,7 +290,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
 ; SSE2: Found an estimated cost of 108 for instruction:   %shift
 ; SSE41: Found an estimated cost of 108 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 108 for instruction:   %shift
 ; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
@ -316,7 +316,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 4 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@ -384,7 +384,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
--- a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@ -27,7 +27,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@ -51,7 +51,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@ -74,7 +74,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %shift = lshr <16 x i16> %a, %b
@ -96,7 +96,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %shift = lshr <32 x i8> %a, %b
@ -124,7 +124,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@ -150,7 +150,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@ -175,7 +175,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@ -199,7 +199,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@ -227,7 +227,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@ -251,7 +251,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@ -274,7 +274,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
@ -296,7 +296,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
  %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
--- a/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@ -52,7 +52,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
 ; SSE2: Found an estimated cost of 20 for instruction:   %shift
 ; SSE41: Found an estimated cost of 20 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 20 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@ -75,7 +75,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
  %shift = shl <16 x i16> %a, %b
@ -97,7 +97,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
  %shift = shl <32 x i8> %a, %b
@ -151,7 +151,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
 ; SSE2: Found an estimated cost of 20 for instruction:   %shift
 ; SSE41: Found an estimated cost of 20 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 20 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@ -176,7 +176,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
  %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@ -200,7 +200,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
  %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@ -298,7 +298,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
  %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
@ -325,7 +325,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
 ; SSE2: Found an estimated cost of 2 for instruction:   %shift
 ; SSE41: Found an estimated cost of 2 for instruction:   %shift
-; AVX: Found an estimated cost of 8 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift