diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 4a5c7e395fd..80e18161a94 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -589,28 +589,28 @@ int X86TTIImpl::getArithmeticInstrCost( return LT.first * Entry->Cost; static const CostTblEntry SSE41CostTable[] = { - { ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence. - { ISD::SHL, MVT::v32i8, 2*11 }, // pblendvb sequence. - { ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence. - { ISD::SHL, MVT::v16i16, 2*14 }, // pblendvb sequence. - { ISD::SHL, MVT::v4i32, 4 }, // pslld/paddd/cvttps2dq/pmulld - { ISD::SHL, MVT::v8i32, 2*4 }, // pslld/paddd/cvttps2dq/pmulld + { ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence. + { ISD::SHL, MVT::v32i8, 2*11+2 }, // pblendvb sequence + split. + { ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence. + { ISD::SHL, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split. + { ISD::SHL, MVT::v4i32, 4 }, // pslld/paddd/cvttps2dq/pmulld + { ISD::SHL, MVT::v8i32, 2*4+2 }, // pslld/paddd/cvttps2dq/pmulld + split - { ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence. - { ISD::SRL, MVT::v32i8, 2*12 }, // pblendvb sequence. - { ISD::SRL, MVT::v8i16, 14 }, // pblendvb sequence. - { ISD::SRL, MVT::v16i16, 2*14 }, // pblendvb sequence. - { ISD::SRL, MVT::v4i32, 11 }, // Shift each lane + blend. - { ISD::SRL, MVT::v8i32, 2*11 }, // Shift each lane + blend. + { ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence. + { ISD::SRL, MVT::v32i8, 2*12+2 }, // pblendvb sequence + split. + { ISD::SRL, MVT::v8i16, 14 }, // pblendvb sequence. + { ISD::SRL, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split. + { ISD::SRL, MVT::v4i32, 11 }, // Shift each lane + blend. + { ISD::SRL, MVT::v8i32, 2*11+2 }, // Shift each lane + blend + split. - { ISD::SRA, MVT::v16i8, 24 }, // pblendvb sequence. - { ISD::SRA, MVT::v32i8, 2*24 }, // pblendvb sequence. - { ISD::SRA, MVT::v8i16, 14 }, // pblendvb sequence. - { ISD::SRA, MVT::v16i16, 2*14 }, // pblendvb sequence. - { ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend. - { ISD::SRA, MVT::v8i32, 2*12 }, // Shift each lane + blend. + { ISD::SRA, MVT::v16i8, 24 }, // pblendvb sequence. + { ISD::SRA, MVT::v32i8, 2*24+2 }, // pblendvb sequence + split. + { ISD::SRA, MVT::v8i16, 14 }, // pblendvb sequence. + { ISD::SRA, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split. + { ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend. + { ISD::SRA, MVT::v8i32, 2*12+2 }, // Shift each lane + blend + split. - { ISD::MUL, MVT::v4i32, 1 } // pmulld + { ISD::MUL, MVT::v4i32, 1 } // pmulld }; if (ST->hasSSE41()) @@ -620,33 +620,33 @@ int X86TTIImpl::getArithmeticInstrCost( static const CostTblEntry SSE2CostTable[] = { // We don't correctly identify costs of casts because they are marked as // custom. - { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence. - { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence. - { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul. - { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence. - { ISD::SHL, MVT::v4i64, 2*4 }, // splat+shuffle sequence. + { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence. + { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence. + { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul. + { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence. + { ISD::SHL, MVT::v4i64, 2*4+2 }, // splat+shuffle sequence + split. - { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence. - { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence. - { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend. - { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence. - { ISD::SRL, MVT::v4i64, 2*4 }, // splat+shuffle sequence. + { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence. + { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence. + { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend. + { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence. + { ISD::SRL, MVT::v4i64, 2*4+2 }, // splat+shuffle sequence + split. - { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence. - { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence. - { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend. - { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence. - { ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence. + { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence. + { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence. + { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend. + { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence. + { ISD::SRA, MVT::v4i64, 2*12+2 }, // srl/xor/sub sequence+split. - { ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence. - { ISD::MUL, MVT::v8i16, 1 }, // pmullw - { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle - { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add + { ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence. + { ISD::MUL, MVT::v8i16, 1 }, // pmullw + { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle + { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add - { ISD::FDIV, MVT::f32, 23 }, // Pentium IV from http://www.agner.org/ - { ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/ - { ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/ - { ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/ + { ISD::FDIV, MVT::f32, 23 }, // Pentium IV from http://www.agner.org/ + { ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/ + { ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/ + { ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/ // It is not a good idea to vectorize division. We have to scalarize it and // in the process we will often end up having to spilling regular diff --git a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index 9b3b58a42b1..eabc2330ddc 100644 --- a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -33,7 +33,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64': ; SSE2: Found an estimated cost of 24 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 6 for instruction: %shift @@ -45,7 +45,7 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64': ; SSE2: Found an estimated cost of 48 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 8 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 12 for instruction: %shift @@ -70,7 +70,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 6 for instruction: %shift @@ -83,7 +83,7 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 12 for instruction: %shift @@ -109,7 +109,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 28 for instruction: %shift -; AVX: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 30 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; AVX512F: Found an estimated cost of 10 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift @@ -122,7 +122,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16': ; SSE2: Found an estimated cost of 128 for instruction: %shift ; SSE41: Found an estimated cost of 56 for instruction: %shift -; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 60 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift @@ -147,7 +147,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': ; SSE2: Found an estimated cost of 108 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 50 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift ; AVX512F: Found an estimated cost of 24 for instruction: %shift ; AVX512BW: Found an estimated cost of 24 for instruction: %shift @@ -160,7 +160,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8': ; SSE2: Found an estimated cost of 216 for instruction: %shift ; SSE41: Found an estimated cost of 96 for instruction: %shift -; AVX: Found an estimated cost of 96 for instruction: %shift +; AVX: Found an estimated cost of 100 for instruction: %shift ; AVX2: Found an estimated cost of 48 for instruction: %shift ; AVX512F: Found an estimated cost of 48 for instruction: %shift ; AVX512BW: Found an estimated cost of 24 for instruction: %shift @@ -322,7 +322,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': ; SSE2: Found an estimated cost of 108 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 50 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift ; AVX512: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 6 for instruction: %shift @@ -336,7 +336,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8': ; SSE2: Found an estimated cost of 216 for instruction: %shift ; SSE41: Found an estimated cost of 96 for instruction: %shift -; AVX: Found an estimated cost of 96 for instruction: %shift +; AVX: Found an estimated cost of 100 for instruction: %shift ; AVX2: Found an estimated cost of 48 for instruction: %shift ; AVX512F: Found an estimated cost of 48 for instruction: %shift ; AVX512BW: Found an estimated cost of 24 for instruction: %shift @@ -367,7 +367,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64': ; SSE2: Found an estimated cost of 24 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 6 for instruction: %shift @@ -379,7 +379,7 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64': ; SSE2: Found an estimated cost of 48 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 8 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 12 for instruction: %shift @@ -404,7 +404,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 6 for instruction: %shift @@ -417,7 +417,7 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 12 for instruction: %shift @@ -443,7 +443,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 28 for instruction: %shift -; AVX: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 30 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; AVX512F: Found an estimated cost of 10 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift @@ -456,7 +456,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16': ; SSE2: Found an estimated cost of 128 for instruction: %shift ; SSE41: Found an estimated cost of 56 for instruction: %shift -; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 60 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift @@ -481,7 +481,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': ; SSE2: Found an estimated cost of 108 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 50 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift ; AVX512: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 6 for instruction: %shift @@ -493,7 +493,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8': ; SSE2: Found an estimated cost of 216 for instruction: %shift ; SSE41: Found an estimated cost of 96 for instruction: %shift -; AVX: Found an estimated cost of 96 for instruction: %shift +; AVX: Found an estimated cost of 100 for instruction: %shift ; AVX2: Found an estimated cost of 48 for instruction: %shift ; AVX512F: Found an estimated cost of 48 for instruction: %shift ; AVX512BW: Found an estimated cost of 24 for instruction: %shift diff --git a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index d5c5d23efdf..6e890369d67 100644 --- a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -34,7 +34,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 6 for instruction: %shift @@ -47,7 +47,7 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 12 for instruction: %shift @@ -73,7 +73,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 22 for instruction: %shift -; AVX: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 6 for instruction: %shift @@ -86,7 +86,7 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 44 for instruction: %shift -; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 12 for instruction: %shift @@ -112,7 +112,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 28 for instruction: %shift -; AVX: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 30 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; AVX512F: Found an estimated cost of 10 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift @@ -125,7 +125,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16': ; SSE2: Found an estimated cost of 128 for instruction: %shift ; SSE41: Found an estimated cost of 56 for instruction: %shift -; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 60 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift @@ -150,7 +150,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 6 for instruction: %shift @@ -162,7 +162,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8': ; SSE2: Found an estimated cost of 104 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift ; AVX512BW: Found an estimated cost of 11 for instruction: %shift @@ -326,7 +326,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 6 for instruction: %shift @@ -340,7 +340,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8': ; SSE2: Found an estimated cost of 104 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift ; AVX512BW: Found an estimated cost of 11 for instruction: %shift @@ -372,7 +372,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 6 for instruction: %shift @@ -385,7 +385,7 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 12 for instruction: %shift @@ -411,7 +411,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 22 for instruction: %shift -; AVX: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 6 for instruction: %shift @@ -424,7 +424,7 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 44 for instruction: %shift -; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 12 for instruction: %shift @@ -450,7 +450,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 28 for instruction: %shift -; AVX: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 30 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; AVX512F: Found an estimated cost of 10 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift @@ -463,7 +463,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16': ; SSE2: Found an estimated cost of 128 for instruction: %shift ; SSE41: Found an estimated cost of 56 for instruction: %shift -; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 60 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift @@ -488,7 +488,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 6 for instruction: %shift @@ -500,7 +500,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8': ; SSE2: Found an estimated cost of 104 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift ; AVX512BW: Found an estimated cost of 11 for instruction: %shift diff --git a/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/test/Analysis/CostModel/X86/vshift-shl-cost.ll index ddcd2ecc727..5e604bb7983 100644 --- a/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ b/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -35,7 +35,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift @@ -48,7 +48,7 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 8 for instruction: %shift @@ -74,7 +74,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': ; SSE2: Found an estimated cost of 20 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift @@ -87,7 +87,7 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32': ; SSE2: Found an estimated cost of 40 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 8 for instruction: %shift @@ -113,7 +113,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 28 for instruction: %shift -; AVX: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 30 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; AVX512F: Found an estimated cost of 10 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift @@ -126,7 +126,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16': ; SSE2: Found an estimated cost of 128 for instruction: %shift ; SSE41: Found an estimated cost of 56 for instruction: %shift -; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 60 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift @@ -151,7 +151,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 22 for instruction: %shift -; AVX: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift @@ -163,7 +163,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8': ; SSE2: Found an estimated cost of 104 for instruction: %shift ; SSE41: Found an estimated cost of 44 for instruction: %shift -; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift ; AVX512BW: Found an estimated cost of 11 for instruction: %shift @@ -327,7 +327,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 22 for instruction: %shift -; AVX: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift @@ -341,7 +341,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8': ; SSE2: Found an estimated cost of 104 for instruction: %shift ; SSE41: Found an estimated cost of 44 for instruction: %shift -; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift ; AVX512BW: Found an estimated cost of 11 for instruction: %shift @@ -373,7 +373,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift @@ -386,7 +386,7 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 8 for instruction: %shift @@ -489,7 +489,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 22 for instruction: %shift -; AVX: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift @@ -501,7 +501,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8': ; SSE2: Found an estimated cost of 104 for instruction: %shift ; SSE41: Found an estimated cost of 44 for instruction: %shift -; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift ; AVX512BW: Found an estimated cost of 11 for instruction: %shift @@ -794,7 +794,7 @@ define <4 x i64> @test8(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'test8': ; SSE2: Found an estimated cost of 8 for instruction: %shl ; SSE41: Found an estimated cost of 8 for instruction: %shl -; AVX: Found an estimated cost of 8 for instruction: %shl +; AVX: Found an estimated cost of 10 for instruction: %shl ; AVX2: Found an estimated cost of 1 for instruction: %shl ; XOPAVX: Found an estimated cost of 4 for instruction: %shl ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl @@ -842,7 +842,7 @@ define <8 x i64> @test11(<8 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'test11': ; SSE2: Found an estimated cost of 16 for instruction: %shl ; SSE41: Found an estimated cost of 16 for instruction: %shl -; AVX: Found an estimated cost of 16 for instruction: %shl +; AVX: Found an estimated cost of 20 for instruction: %shl ; AVX2: Found an estimated cost of 2 for instruction: %shl ; XOPAVX: Found an estimated cost of 8 for instruction: %shl ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl