From c7432f9ad36823e5958e5d56868ca6804f977edd Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Wed, 6 Jul 2016 17:30:56 +0000 Subject: [PATCH] [TTI] The cost model should not assume vector casts get completely scalarized The cost model should not assume vector casts get completely scalarized, since on targets that have vector support, the common case is a partial split up to the legal vector size. So, when a vector cast gets split, the resulting casts end up legal and cheap. Instead of pessimistically assuming scalarization, base TTI can use the costs the concrete TTI provides for the split vector, plus a fudge factor to account for the cost of the split itself. This fudge factor is currently 1 by default, except on AMDGPU where inserts and extracts are considered free. Differential Revision: http://reviews.llvm.org/D21251 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274642 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/BasicTTIImpl.h | 34 +++- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 2 + test/Analysis/CostModel/ARM/cast.ll | 176 +++++++++--------- test/Analysis/CostModel/PowerPC/ext.ll | 2 +- test/Analysis/CostModel/X86/sitofp.ll | 118 ++++++------ test/Analysis/CostModel/X86/uitofp.ll | 124 ++++++------ .../LoopVectorize/X86/gather_scatter.ll | 6 +- 7 files changed, 241 insertions(+), 221 deletions(-) diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index 78cb0af1699..1aa13fb7359 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -315,6 +315,8 @@ public: } // Else, assume that we need to scalarize this op. + // TODO: If one of the types get legalized by splitting, handle this + // similarly to what getCastInstrCost() does. if (Ty->isVectorTy()) { unsigned Num = Ty->getVectorNumElements(); unsigned Cost = static_cast(this) @@ -409,12 +411,25 @@ public: return SrcLT.first * 1; } - // If we are converting vectors and the operation is illegal, or - // if the vectors are legalized to different types, estimate the - // scalarization costs. - // TODO: This is probably a big overestimate. For splits, we should have - // something like getTypeLegalizationCost() + 2 * getCastInstrCost(). - // The same applies to getCmpSelInstrCost() and getArithmeticInstrCost() + // If we are legalizing by splitting, query the concrete TTI for the cost + // of casting the original vector twice. We also need to factor int the + // cost of the split itself. Count that as 1, to be consistent with + // TLI->getTypeLegalizationCost(). + if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == + TargetLowering::TypeSplitVector) || + (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == + TargetLowering::TypeSplitVector)) { + Type *SplitDst = VectorType::get(Dst->getVectorElementType(), + Dst->getVectorNumElements() / 2); + Type *SplitSrc = VectorType::get(Src->getVectorElementType(), + Src->getVectorNumElements() / 2); + T *TTI = static_cast(this); + return TTI->getVectorSplitCost() + + (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc)); + } + + // In other cases where the source or destination are illegal, assume + // the operation will get scalarized. unsigned Num = Dst->getVectorNumElements(); unsigned Cost = static_cast(this)->getCastInstrCost( Opcode, Dst->getScalarType(), Src->getScalarType()); @@ -472,6 +487,8 @@ public: } // Otherwise, assume that the cast is scalarized. + // TODO: If one of the types get legalized by splitting, handle this + // similarly to what getCastInstrCost() does. if (ValTy->isVectorTy()) { unsigned Num = ValTy->getVectorNumElements(); if (CondTy) @@ -480,8 +497,7 @@ public: Opcode, ValTy->getScalarType(), CondTy); // Return the cost of multiple scalar invocation plus the cost of - // inserting - // and extracting the values. + // inserting and extracting the values. return getScalarizationOverhead(ValTy, true, false) + Num * Cost; } @@ -906,6 +922,8 @@ public: return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); } + unsigned getVectorSplitCost() { return 1; } + /// @} }; diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index d7efaa502e6..a82a0745808 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -96,6 +96,8 @@ public: int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); bool isSourceOfDivergence(const Value *V) const; + + unsigned getVectorSplitCost() { return 0; } }; } // end namespace llvm diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll index 21e29282209..72308834c53 100644 --- a/test/Analysis/CostModel/ARM/cast.ll +++ b/test/Analysis/CostModel/ARM/cast.ll @@ -264,39 +264,39 @@ define i32 @casts() { %r116 = fptoui <4 x float> undef to <4 x i32> ; CHECK: Found an estimated cost of 1 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> %r117 = fptosi <4 x float> undef to <4 x i32> - ; CHECK: Found an estimated cost of 64 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> + ; CHECK: Found an estimated cost of 65 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> %r118 = fptoui <4 x float> undef to <4 x i64> - ; CHECK: Found an estimated cost of 64 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> + ; CHECK: Found an estimated cost of 65 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> %r119 = fptosi <4 x float> undef to <4 x i64> - ; CHECK: Found an estimated cost of 32 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> + ; CHECK: Found an estimated cost of 33 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> %r120 = fptoui <4 x double> undef to <4 x i1> - ; CHECK: Found an estimated cost of 32 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> + ; CHECK: Found an estimated cost of 33 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> %r121 = fptosi <4 x double> undef to <4 x i1> - ; CHECK: Found an estimated cost of 32 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> + ; CHECK: Found an estimated cost of 33 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> %r122 = fptoui <4 x double> undef to <4 x i8> - ; CHECK: Found an estimated cost of 32 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> + ; CHECK: Found an estimated cost of 33 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> %r123 = fptosi <4 x double> undef to <4 x i8> - ; CHECK: Found an estimated cost of 32 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> + ; CHECK: Found an estimated cost of 33 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> %r124 = fptoui <4 x double> undef to <4 x i16> - ; CHECK: Found an estimated cost of 32 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> + ; CHECK: Found an estimated cost of 33 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> %r125 = fptosi <4 x double> undef to <4 x i16> - ; CHECK: Found an estimated cost of 32 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> + ; CHECK: Found an estimated cost of 5 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> %r126 = fptoui <4 x double> undef to <4 x i32> - ; CHECK: Found an estimated cost of 32 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> + ; CHECK: Found an estimated cost of 5 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> %r127 = fptosi <4 x double> undef to <4 x i32> - ; CHECK: Found an estimated cost of 64 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> + ; CHECK: Found an estimated cost of 65 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> %r128 = fptoui <4 x double> undef to <4 x i64> - ; CHECK: Found an estimated cost of 64 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> + ; CHECK: Found an estimated cost of 65 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> %r129 = fptosi <4 x double> undef to <4 x i64> - ; CHECK: Found an estimated cost of 64 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> + ; CHECK: Found an estimated cost of 65 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> %r130 = fptoui <8 x float> undef to <8 x i1> - ; CHECK: Found an estimated cost of 64 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> + ; CHECK: Found an estimated cost of 65 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> %r131 = fptosi <8 x float> undef to <8 x i1> - ; CHECK: Found an estimated cost of 64 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> + ; CHECK: Found an estimated cost of 7 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> %r132 = fptoui <8 x float> undef to <8 x i8> - ; CHECK: Found an estimated cost of 64 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> + ; CHECK: Found an estimated cost of 7 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> %r133 = fptosi <8 x float> undef to <8 x i8> ; CHECK: Found an estimated cost of 4 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> %r134 = fptoui <8 x float> undef to <8 x i16> @@ -306,39 +306,39 @@ define i32 @casts() { %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> %r137 = fptosi <8 x float> undef to <8 x i32> - ; CHECK: Found an estimated cost of 128 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> + ; CHECK: Found an estimated cost of 131 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> %r138 = fptoui <8 x float> undef to <8 x i64> - ; CHECK: Found an estimated cost of 128 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> + ; CHECK: Found an estimated cost of 131 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> %r139 = fptosi <8 x float> undef to <8 x i64> - ; CHECK: Found an estimated cost of 64 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> + ; CHECK: Found an estimated cost of 67 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> %r140 = fptoui <8 x double> undef to <8 x i1> - ; CHECK: Found an estimated cost of 64 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> + ; CHECK: Found an estimated cost of 67 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> %r141 = fptosi <8 x double> undef to <8 x i1> - ; CHECK: Found an estimated cost of 64 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> + ; CHECK: Found an estimated cost of 67 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> %r142 = fptoui <8 x double> undef to <8 x i8> - ; CHECK: Found an estimated cost of 64 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> + ; CHECK: Found an estimated cost of 67 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> %r143 = fptosi <8 x double> undef to <8 x i8> - ; CHECK: Found an estimated cost of 64 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> + ; CHECK: Found an estimated cost of 67 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> %r144 = fptoui <8 x double> undef to <8 x i16> - ; CHECK: Found an estimated cost of 64 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> + ; CHECK: Found an estimated cost of 67 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> %r145 = fptosi <8 x double> undef to <8 x i16> - ; CHECK: Found an estimated cost of 64 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> + ; CHECK: Found an estimated cost of 11 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> %r146 = fptoui <8 x double> undef to <8 x i32> - ; CHECK: Found an estimated cost of 64 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> + ; CHECK: Found an estimated cost of 11 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> %r147 = fptosi <8 x double> undef to <8 x i32> - ; CHECK: Found an estimated cost of 128 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> + ; CHECK: Found an estimated cost of 131 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> %r148 = fptoui <8 x double> undef to <8 x i64> - ; CHECK: Found an estimated cost of 128 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> + ; CHECK: Found an estimated cost of 131 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> %r149 = fptosi <8 x double> undef to <8 x i64> - ; CHECK: Found an estimated cost of 128 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> + ; CHECK: Found an estimated cost of 131 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> %r150 = fptoui <16 x float> undef to <16 x i1> - ; CHECK: Found an estimated cost of 128 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> + ; CHECK: Found an estimated cost of 131 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> %r151 = fptosi <16 x float> undef to <16 x i1> - ; CHECK: Found an estimated cost of 128 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> + ; CHECK: Found an estimated cost of 15 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> %r152 = fptoui <16 x float> undef to <16 x i8> - ; CHECK: Found an estimated cost of 128 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> + ; CHECK: Found an estimated cost of 15 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> %r153 = fptosi <16 x float> undef to <16 x i8> ; CHECK: Found an estimated cost of 8 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> %r154 = fptoui <16 x float> undef to <16 x i16> @@ -348,30 +348,30 @@ define i32 @casts() { %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> %r157 = fptosi <16 x float> undef to <16 x i32> - ; CHECK: Found an estimated cost of 256 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> + ; CHECK: Found an estimated cost of 263 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> %r158 = fptoui <16 x float> undef to <16 x i64> - ; CHECK: Found an estimated cost of 256 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> + ; CHECK: Found an estimated cost of 263 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> %r159 = fptosi <16 x float> undef to <16 x i64> - ; CHECK: Found an estimated cost of 128 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> + ; CHECK: Found an estimated cost of 135 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> %r160 = fptoui <16 x double> undef to <16 x i1> - ; CHECK: Found an estimated cost of 128 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> + ; CHECK: Found an estimated cost of 135 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> %r161 = fptosi <16 x double> undef to <16 x i1> - ; CHECK: Found an estimated cost of 128 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> + ; CHECK: Found an estimated cost of 135 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> %r162 = fptoui <16 x double> undef to <16 x i8> - ; CHECK: Found an estimated cost of 128 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> + ; CHECK: Found an estimated cost of 135 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> %r163 = fptosi <16 x double> undef to <16 x i8> - ; CHECK: Found an estimated cost of 128 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> + ; CHECK: Found an estimated cost of 135 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> %r164 = fptoui <16 x double> undef to <16 x i16> - ; CHECK: Found an estimated cost of 128 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> + ; CHECK: Found an estimated cost of 135 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> %r165 = fptosi <16 x double> undef to <16 x i16> - ; CHECK: Found an estimated cost of 128 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> + ; CHECK: Found an estimated cost of 23 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> %r166 = fptoui <16 x double> undef to <16 x i32> - ; CHECK: Found an estimated cost of 128 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> + ; CHECK: Found an estimated cost of 23 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> %r167 = fptosi <16 x double> undef to <16 x i32> - ; CHECK: Found an estimated cost of 256 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> + ; CHECK: Found an estimated cost of 263 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> %r168 = fptoui <16 x double> undef to <16 x i64> - ; CHECK: Found an estimated cost of 256 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> + ; CHECK: Found an estimated cost of 263 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> @@ -432,39 +432,39 @@ define i32 @casts() { %r196 = uitofp <4 x i32> undef to <4 x float> ; CHECK: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> %r197 = sitofp <4 x i32> undef to <4 x float> - ; CHECK: Found an estimated cost of 56 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> + ; CHECK: Found an estimated cost of 57 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> %r198 = uitofp <4 x i64> undef to <4 x float> - ; CHECK: Found an estimated cost of 56 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> + ; CHECK: Found an estimated cost of 57 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> %r199 = sitofp <4 x i64> undef to <4 x float> - ; CHECK: Found an estimated cost of 16 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> + ; CHECK: Found an estimated cost of 17 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> %r200 = uitofp <4 x i1> undef to <4 x double> - ; CHECK: Found an estimated cost of 16 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> + ; CHECK: Found an estimated cost of 17 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> %r201 = sitofp <4 x i1> undef to <4 x double> - ; CHECK: Found an estimated cost of 16 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> + ; CHECK: Found an estimated cost of 9 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> %r202 = uitofp <4 x i8> undef to <4 x double> - ; CHECK: Found an estimated cost of 16 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> + ; CHECK: Found an estimated cost of 9 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> %r203 = sitofp <4 x i8> undef to <4 x double> - ; CHECK: Found an estimated cost of 16 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> + ; CHECK: Found an estimated cost of 7 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> %r204 = uitofp <4 x i16> undef to <4 x double> - ; CHECK: Found an estimated cost of 16 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> + ; CHECK: Found an estimated cost of 7 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> %r205 = sitofp <4 x i16> undef to <4 x double> - ; CHECK: Found an estimated cost of 16 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> + ; CHECK: Found an estimated cost of 5 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> %r206 = uitofp <4 x i32> undef to <4 x double> - ; CHECK: Found an estimated cost of 16 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> + ; CHECK: Found an estimated cost of 5 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> %r207 = sitofp <4 x i32> undef to <4 x double> - ; CHECK: Found an estimated cost of 48 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> + ; CHECK: Found an estimated cost of 49 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> %r208 = uitofp <4 x i64> undef to <4 x double> - ; CHECK: Found an estimated cost of 48 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> + ; CHECK: Found an estimated cost of 49 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> %r209 = sitofp <4 x i64> undef to <4 x double> - ; CHECK: Found an estimated cost of 48 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> + ; CHECK: Found an estimated cost of 7 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> %r210 = uitofp <8 x i1> undef to <8 x float> - ; CHECK: Found an estimated cost of 48 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> + ; CHECK: Found an estimated cost of 7 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> %r211 = sitofp <8 x i1> undef to <8 x float> - ; CHECK: Found an estimated cost of 48 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> + ; CHECK: Found an estimated cost of 7 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> %r212 = uitofp <8 x i8> undef to <8 x float> - ; CHECK: Found an estimated cost of 48 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> + ; CHECK: Found an estimated cost of 7 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> %r213 = sitofp <8 x i8> undef to <8 x float> ; CHECK: Found an estimated cost of 4 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> %r214 = uitofp <8 x i16> undef to <8 x float> @@ -474,39 +474,39 @@ define i32 @casts() { %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> %r217 = sitofp <8 x i32> undef to <8 x float> - ; CHECK: Found an estimated cost of 112 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> + ; CHECK: Found an estimated cost of 115 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> %r218 = uitofp <8 x i64> undef to <8 x float> - ; CHECK: Found an estimated cost of 112 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> + ; CHECK: Found an estimated cost of 115 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> %r219 = sitofp <8 x i64> undef to <8 x float> - ; CHECK: Found an estimated cost of 32 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> + ; CHECK: Found an estimated cost of 35 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> %r220 = uitofp <8 x i1> undef to <8 x double> - ; CHECK: Found an estimated cost of 32 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> + ; CHECK: Found an estimated cost of 35 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> %r221 = sitofp <8 x i1> undef to <8 x double> - ; CHECK: Found an estimated cost of 32 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> + ; CHECK: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> %r222 = uitofp <8 x i8> undef to <8 x double> - ; CHECK: Found an estimated cost of 32 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> + ; CHECK: Found an estimated cost of 19 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> %r223 = sitofp <8 x i8> undef to <8 x double> - ; CHECK: Found an estimated cost of 32 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> + ; CHECK: Found an estimated cost of 15 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> %r224 = uitofp <8 x i16> undef to <8 x double> - ; CHECK: Found an estimated cost of 32 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> + ; CHECK: Found an estimated cost of 15 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> %r225 = sitofp <8 x i16> undef to <8 x double> - ; CHECK: Found an estimated cost of 32 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> + ; CHECK: Found an estimated cost of 15 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> %r226 = uitofp <8 x i16> undef to <8 x double> - ; CHECK: Found an estimated cost of 32 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> + ; CHECK: Found an estimated cost of 15 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> %r227 = sitofp <8 x i16> undef to <8 x double> - ; CHECK: Found an estimated cost of 96 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> + ; CHECK: Found an estimated cost of 99 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> %r228 = uitofp <8 x i64> undef to <8 x double> - ; CHECK: Found an estimated cost of 96 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> + ; CHECK: Found an estimated cost of 99 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> %r229 = sitofp <8 x i64> undef to <8 x double> - ; CHECK: Found an estimated cost of 96 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> + ; CHECK: Found an estimated cost of 15 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> %r230 = uitofp <16 x i1> undef to <16 x float> - ; CHECK: Found an estimated cost of 96 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> + ; CHECK: Found an estimated cost of 15 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> %r231 = sitofp <16 x i1> undef to <16 x float> - ; CHECK: Found an estimated cost of 96 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> + ; CHECK: Found an estimated cost of 15 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> %r232 = uitofp <16 x i8> undef to <16 x float> - ; CHECK: Found an estimated cost of 96 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> + ; CHECK: Found an estimated cost of 15 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> %r233 = sitofp <16 x i8> undef to <16 x float> ; CHECK: Found an estimated cost of 8 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> %r234 = uitofp <16 x i16> undef to <16 x float> @@ -516,30 +516,30 @@ define i32 @casts() { %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> %r237 = sitofp <16 x i32> undef to <16 x float> - ; CHECK: Found an estimated cost of 224 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> + ; CHECK: Found an estimated cost of 231 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> %r238 = uitofp <16 x i64> undef to <16 x float> - ; CHECK: Found an estimated cost of 224 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> + ; CHECK: Found an estimated cost of 231 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> %r239 = sitofp <16 x i64> undef to <16 x float> - ; CHECK: Found an estimated cost of 64 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> + ; CHECK: Found an estimated cost of 71 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> %r240 = uitofp <16 x i1> undef to <16 x double> - ; CHECK: Found an estimated cost of 64 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> + ; CHECK: Found an estimated cost of 71 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> %r241 = sitofp <16 x i1> undef to <16 x double> - ; CHECK: Found an estimated cost of 64 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> + ; CHECK: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> %r242 = uitofp <16 x i8> undef to <16 x double> - ; CHECK: Found an estimated cost of 64 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> + ; CHECK: Found an estimated cost of 39 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> %r243 = sitofp <16 x i8> undef to <16 x double> - ; CHECK: Found an estimated cost of 64 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> + ; CHECK: Found an estimated cost of 31 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> %r244 = uitofp <16 x i16> undef to <16 x double> - ; CHECK: Found an estimated cost of 64 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> + ; CHECK: Found an estimated cost of 31 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> %r245 = sitofp <16 x i16> undef to <16 x double> - ; CHECK: Found an estimated cost of 64 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> + ; CHECK: Found an estimated cost of 31 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> %r246 = uitofp <16 x i16> undef to <16 x double> - ; CHECK: Found an estimated cost of 64 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> + ; CHECK: Found an estimated cost of 31 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> %r247 = sitofp <16 x i16> undef to <16 x double> - ; CHECK: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> + ; CHECK: Found an estimated cost of 199 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> %r248 = uitofp <16 x i64> undef to <16 x double> - ; CHECK: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> + ; CHECK: Found an estimated cost of 199 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> %r249 = sitofp <16 x i64> undef to <16 x double> ; CHECK: Found an estimated cost of 0 for instruction: ret i32 undef diff --git a/test/Analysis/CostModel/PowerPC/ext.ll b/test/Analysis/CostModel/PowerPC/ext.ll index 7d6a14e93cd..df9c53e6e57 100644 --- a/test/Analysis/CostModel/PowerPC/ext.ll +++ b/test/Analysis/CostModel/PowerPC/ext.ll @@ -13,7 +13,7 @@ define void @exts() { ; CHECK: cost of 1 {{.*}} sext %v3 = sext <4 x i16> undef to <4 x i32> - ; CHECK: cost of 112 {{.*}} sext + ; CHECK: cost of 3 {{.*}} sext %v4 = sext <8 x i16> undef to <8 x i32> ret void diff --git a/test/Analysis/CostModel/X86/sitofp.ll b/test/Analysis/CostModel/X86/sitofp.ll index 9f0c4065c17..d5fa0b96842 100644 --- a/test/Analysis/CostModel/X86/sitofp.ll +++ b/test/Analysis/CostModel/X86/sitofp.ll @@ -40,10 +40,10 @@ define <8 x double> @sitofpv8i8v8double(<8 x i8> %a) { ; SSE2: cost of 80 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv8i8v8double - ; AVX1: cost of 20 {{.*}} sitofp + ; AVX1: cost of 7 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv8i8v8double - ; AVX2: cost of 20 {{.*}} sitofp + ; AVX2: cost of 7 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv8i8v8double ; AVX512F: cost of 2 {{.*}} sitofp @@ -56,13 +56,13 @@ define <16 x double> @sitofpv16i8v16double(<16 x i8> %a) { ; SSE2: cost of 160 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i8v16double - ; AVX1: cost of 40 {{.*}} sitofp + ; AVX1: cost of 15 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i8v16double - ; AVX2: cost of 40 {{.*}} sitofp + ; AVX2: cost of 15 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i8v16double - ; AVX512F: cost of 44 {{.*}} sitofp + ; AVX512F: cost of 5 {{.*}} sitofp %1 = sitofp <16 x i8> %a to <16 x double> ret <16 x double> %1 } @@ -72,13 +72,13 @@ define <32 x double> @sitofpv32i8v32double(<32 x i8> %a) { ; SSE2: cost of 320 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i8v32double - ; AVX1: cost of 80 {{.*}} sitofp + ; AVX1: cost of 31 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i8v32double - ; AVX2: cost of 80 {{.*}} sitofp + ; AVX2: cost of 31 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i8v32double - ; AVX512F: cost of 88 {{.*}} sitofp + ; AVX512F: cost of 11 {{.*}} sitofp %1 = sitofp <32 x i8> %a to <32 x double> ret <32 x double> %1 } @@ -120,10 +120,10 @@ define <8 x double> @sitofpv8i16v8double(<8 x i16> %a) { ; SSE2: cost of 80 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv8i16v8double - ; AVX1: cost of 20 {{.*}} sitofp + ; AVX1: cost of 7 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv8i16v8double - ; AVX2: cost of 20 {{.*}} sitofp + ; AVX2: cost of 7 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv8i16v8double ; AVX512F: cost of 2 {{.*}} sitofp @@ -136,13 +136,13 @@ define <16 x double> @sitofpv16i16v16double(<16 x i16> %a) { ; SSE2: cost of 160 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i16v16double - ; AVX1: cost of 40 {{.*}} sitofp + ; AVX1: cost of 15 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i16v16double - ; AVX2: cost of 40 {{.*}} sitofp + ; AVX2: cost of 15 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i16v16double - ; AVX512F: cost of 44 {{.*}} sitofp + ; AVX512F: cost of 5 {{.*}} sitofp %1 = sitofp <16 x i16> %a to <16 x double> ret <16 x double> %1 } @@ -152,13 +152,13 @@ define <32 x double> @sitofpv32i16v32double(<32 x i16> %a) { ; SSE2: cost of 320 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i16v32double - ; AVX1: cost of 80 {{.*}} sitofp + ; AVX1: cost of 31 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i16v32double - ; AVX2: cost of 80 {{.*}} sitofp + ; AVX2: cost of 31 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i16v32double - ; AVX512F: cost of 88 {{.*}} sitofp + ; AVX512F: cost of 11 {{.*}} sitofp %1 = sitofp <32 x i16> %a to <32 x double> ret <32 x double> %1 } @@ -200,10 +200,10 @@ define <8 x double> @sitofpv8i32v8double(<8 x i32> %a) { ; SSE2: cost of 80 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv8i32v8double - ; AVX1: cost of 20 {{.*}} sitofp + ; AVX1: cost of 3 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv8i32v8double - ; AVX2: cost of 20 {{.*}} sitofp + ; AVX2: cost of 3 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv8i32v8double ; AVX512F: cost of 1 {{.*}} sitofp @@ -216,13 +216,13 @@ define <16 x double> @sitofpv16i32v16double(<16 x i32> %a) { ; SSE2: cost of 160 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i32v16double - ; AVX1: cost of 40 {{.*}} sitofp + ; AVX1: cost of 7 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i32v16double - ; AVX2: cost of 40 {{.*}} sitofp + ; AVX2: cost of 7 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i32v16double - ; AVX512F: cost of 44 {{.*}} sitofp + ; AVX512F: cost of 3 {{.*}} sitofp %1 = sitofp <16 x i32> %a to <16 x double> ret <16 x double> %1 } @@ -232,13 +232,13 @@ define <32 x double> @sitofpv32i32v32double(<32 x i32> %a) { ; SSE2: cost of 320 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i32v32double - ; AVX1: cost of 80 {{.*}} sitofp + ; AVX1: cost of 15 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i32v32double - ; AVX2: cost of 80 {{.*}} sitofp + ; AVX2: cost of 15 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i32v32double - ; AVX512F: cost of 88 {{.*}} sitofp + ; AVX512F: cost of 7 {{.*}} sitofp %1 = sitofp <32 x i32> %a to <32 x double> ret <32 x double> %1 } @@ -280,10 +280,10 @@ define <8 x double> @sitofpv8i64v8double(<8 x i64> %a) { ; SSE2: cost of 80 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv8i64v8double - ; AVX1: cost of 20 {{.*}} sitofp + ; AVX1: cost of 21 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv8i64v8double - ; AVX2: cost of 20 {{.*}} sitofp + ; AVX2: cost of 21 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv8i64v8double ; AVX512F: cost of 22 {{.*}} sitofp @@ -296,13 +296,13 @@ define <16 x double> @sitofpv16i64v16double(<16 x i64> %a) { ; SSE2: cost of 160 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i64v16double - ; AVX1: cost of 40 {{.*}} sitofp + ; AVX1: cost of 43 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i64v16double - ; AVX2: cost of 40 {{.*}} sitofp + ; AVX2: cost of 43 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i64v16double - ; AVX512F: cost of 44 {{.*}} sitofp + ; AVX512F: cost of 45 {{.*}} sitofp %1 = sitofp <16 x i64> %a to <16 x double> ret <16 x double> %1 } @@ -312,13 +312,13 @@ define <32 x double> @sitofpv32i64v32double(<32 x i64> %a) { ; SSE2: cost of 320 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i64v32double - ; AVX1: cost of 80 {{.*}} sitofp + ; AVX1: cost of 87 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i64v32double - ; AVX2: cost of 80 {{.*}} sitofp + ; AVX2: cost of 87 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i64v32double - ; AVX512F: cost of 88 {{.*}} sitofp + ; AVX512F: cost of 91 {{.*}} sitofp %1 = sitofp <32 x i64> %a to <32 x double> ret <32 x double> %1 } @@ -376,10 +376,10 @@ define <16 x float> @sitofpv16i8v16float(<16 x i8> %a) { ; SSE2: cost of 8 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i8v16float - ; AVX1: cost of 44 {{.*}} sitofp + ; AVX1: cost of 17 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i8v16float - ; AVX2: cost of 44 {{.*}} sitofp + ; AVX2: cost of 17 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i8v16float ; AVX512F: cost of 2 {{.*}} sitofp @@ -392,13 +392,13 @@ define <32 x float> @sitofpv32i8v32float(<32 x i8> %a) { ; SSE2: cost of 16 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i8v32float - ; AVX1: cost of 88 {{.*}} sitofp + ; AVX1: cost of 35 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i8v32float - ; AVX2: cost of 88 {{.*}} sitofp + ; AVX2: cost of 35 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i8v32float - ; AVX512F: cost of 92 {{.*}} sitofp + ; AVX512F: cost of 5 {{.*}} sitofp %1 = sitofp <32 x i8> %a to <32 x float> ret <32 x float> %1 } @@ -456,10 +456,10 @@ define <16 x float> @sitofpv16i16v16float(<16 x i16> %a) { ; SSE2: cost of 30 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i16v16float - ; AVX1: cost of 44 {{.*}} sitofp + ; AVX1: cost of 11 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i16v16float - ; AVX2: cost of 44 {{.*}} sitofp + ; AVX2: cost of 11 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i16v16float ; AVX512F: cost of 2 {{.*}} sitofp @@ -472,13 +472,13 @@ define <32 x float> @sitofpv32i16v32float(<32 x i16> %a) { ; SSE2: cost of 60 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i16v32float - ; AVX1: cost of 88 {{.*}} sitofp + ; AVX1: cost of 23 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i16v32float - ; AVX2: cost of 88 {{.*}} sitofp + ; AVX2: cost of 23 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i16v32float - ; AVX512F: cost of 92 {{.*}} sitofp + ; AVX512F: cost of 5 {{.*}} sitofp %1 = sitofp <32 x i16> %a to <32 x float> ret <32 x float> %1 } @@ -536,10 +536,10 @@ define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) { ; SSE2: cost of 60 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i32v16float - ; AVX1: cost of 44 {{.*}} sitofp + ; AVX1: cost of 3 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i32v16float - ; AVX2: cost of 44 {{.*}} sitofp + ; AVX2: cost of 3 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i32v16float ; AVX512F: cost of 1 {{.*}} sitofp @@ -552,13 +552,13 @@ define <32 x float> @sitofpv32i32v32float(<32 x i32> %a) { ; SSE2: cost of 120 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i32v32float - ; AVX1: cost of 88 {{.*}} sitofp + ; AVX1: cost of 7 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i32v32float - ; AVX2: cost of 88 {{.*}} sitofp + ; AVX2: cost of 7 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i32v32float - ; AVX512F: cost of 92 {{.*}} sitofp + ; AVX512F: cost of 3 {{.*}} sitofp %1 = sitofp <32 x i32> %a to <32 x float> ret <32 x float> %1 } @@ -600,10 +600,10 @@ define <8 x float> @sitofpv8i64v8float(<8 x i64> %a) { ; SSE2: cost of 60 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv8i64v8float - ; AVX1: cost of 22 {{.*}} sitofp + ; AVX1: cost of 21 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv8i64v8float - ; AVX2: cost of 22 {{.*}} sitofp + ; AVX2: cost of 21 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv8i64v8float ; AVX512F: cost of 22 {{.*}} sitofp @@ -616,13 +616,13 @@ define <16 x float> @sitofpv16i64v16float(<16 x i64> %a) { ; SSE2: cost of 120 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i64v16float - ; AVX1: cost of 44 {{.*}} sitofp + ; AVX1: cost of 43 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i64v16float - ; AVX2: cost of 44 {{.*}} sitofp + ; AVX2: cost of 43 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i64v16float - ; AVX512F: cost of 46 {{.*}} sitofp + ; AVX512F: cost of 45 {{.*}} sitofp %1 = sitofp <16 x i64> %a to <16 x float> ret <16 x float> %1 } @@ -632,13 +632,13 @@ define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) { ; SSE2: cost of 240 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i64v32float - ; AVX1: cost of 88 {{.*}} sitofp + ; AVX1: cost of 87 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i64v32float - ; AVX2: cost of 88 {{.*}} sitofp + ; AVX2: cost of 87 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i64v32float - ; AVX512F: cost of 92 {{.*}} sitofp + ; AVX512F: cost of 91 {{.*}} sitofp %1 = sitofp <32 x i64> %a to <32 x float> ret <32 x float> %1 } @@ -648,10 +648,10 @@ define <8 x double> @sitofpv8i1v8double(<8 x double> %a) { ; SSE2: cost of 80 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv8i1v8double - ; AVX1: cost of 20 {{.*}} sitofp + ; AVX1: cost of 7 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv8i1v8double - ; AVX2: cost of 20 {{.*}} sitofp + ; AVX2: cost of 7 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv8i1v8double ; AVX512F: cost of 4 {{.*}} sitofp @@ -665,10 +665,10 @@ define <16 x float> @sitofpv16i1v16float(<16 x float> %a) { ; SSE2: cost of 8 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i1v16float - ; AVX1: cost of 44 {{.*}} sitofp + ; AVX1: cost of 17 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i1v16float - ; AVX2: cost of 44 {{.*}} sitofp + ; AVX2: cost of 17 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i1v16float ; AVX512F: cost of 3 {{.*}} sitofp diff --git a/test/Analysis/CostModel/X86/uitofp.ll b/test/Analysis/CostModel/X86/uitofp.ll index 08e36650bec..35f5d15ccab 100644 --- a/test/Analysis/CostModel/X86/uitofp.ll +++ b/test/Analysis/CostModel/X86/uitofp.ll @@ -41,10 +41,10 @@ define <8 x double> @uitofpv8i8v8double(<8 x i8> %a) { ; SSE2: cost of 80 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv8i8v8double - ; AVX1: cost of 20 {{.*}} uitofp + ; AVX1: cost of 5 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv8i8v8double - ; AVX2: cost of 20 {{.*}} uitofp + ; AVX2: cost of 5 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv8i8v8double ; AVX512F: cost of 2 {{.*}} uitofp @@ -57,13 +57,13 @@ define <16 x double> @uitofpv16i8v16double(<16 x i8> %a) { ; SSE2: cost of 160 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i8v16double - ; AVX1: cost of 40 {{.*}} uitofp + ; AVX1: cost of 11 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i8v16double - ; AVX2: cost of 40 {{.*}} uitofp + ; AVX2: cost of 11 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i8v16double - ; AVX512F: cost of 44 {{.*}} uitofp + ; AVX512F: cost of 5 {{.*}} uitofp %1 = uitofp <16 x i8> %a to <16 x double> ret <16 x double> %1 } @@ -73,13 +73,13 @@ define <32 x double> @uitofpv32i8v32double(<32 x i8> %a) { ; SSE2: cost of 320 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i8v32double - ; AVX1: cost of 80 {{.*}} uitofp + ; AVX1: cost of 23 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i8v32double - ; AVX2: cost of 80 {{.*}} uitofp + ; AVX2: cost of 23 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i8v32double - ; AVX512F: cost of 88 {{.*}} uitofp + ; AVX512F: cost of 11 {{.*}} uitofp %1 = uitofp <32 x i8> %a to <32 x double> ret <32 x double> %1 } @@ -121,10 +121,10 @@ define <8 x double> @uitofpv8i16v8double(<8 x i16> %a) { ; SSE2: cost of 80 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv8i16v8double - ; AVX1: cost of 20 {{.*}} uitofp + ; AVX1: cost of 5 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv8i16v8double - ; AVX2: cost of 20 {{.*}} uitofp + ; AVX2: cost of 5 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv8i16v8double ; AVX512F: cost of 2 {{.*}} uitofp @@ -137,13 +137,13 @@ define <16 x double> @uitofpv16i16v16double(<16 x i16> %a) { ; SSE2: cost of 160 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i16v16double - ; AVX1: cost of 40 {{.*}} uitofp + ; AVX1: cost of 11 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i16v16double - ; AVX2: cost of 40 {{.*}} uitofp + ; AVX2: cost of 11 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i16v16double - ; AVX512F: cost of 44 {{.*}} uitofp + ; AVX512F: cost of 5 {{.*}} uitofp %1 = uitofp <16 x i16> %a to <16 x double> ret <16 x double> %1 } @@ -153,13 +153,13 @@ define <32 x double> @uitofpv32i16v32double(<32 x i16> %a) { ; SSE2: cost of 320 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i16v32double - ; AVX1: cost of 80 {{.*}} uitofp + ; AVX1: cost of 23 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i16v32double - ; AVX2: cost of 80 {{.*}} uitofp + ; AVX2: cost of 23 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i16v32double - ; AVX512F: cost of 88 {{.*}} uitofp + ; AVX512F: cost of 11 {{.*}} uitofp %1 = uitofp <32 x i16> %a to <32 x double> ret <32 x double> %1 } @@ -201,10 +201,10 @@ define <8 x double> @uitofpv8i32v8double(<8 x i32> %a) { ; SSE2: cost of 80 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv8i32v8double - ; AVX1: cost of 20 {{.*}} uitofp + ; AVX1: cost of 13 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv8i32v8double - ; AVX2: cost of 20 {{.*}} uitofp + ; AVX2: cost of 13 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv8i32v8double ; AVX512F: cost of 1 {{.*}} uitofp @@ -217,13 +217,13 @@ define <16 x double> @uitofpv16i32v16double(<16 x i32> %a) { ; SSE2: cost of 160 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i32v16double - ; AVX1: cost of 40 {{.*}} uitofp + ; AVX1: cost of 27 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i32v16double - ; AVX2: cost of 40 {{.*}} uitofp + ; AVX2: cost of 27 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i32v16double - ; AVX512F: cost of 44 {{.*}} uitofp + ; AVX512F: cost of 3 {{.*}} uitofp %1 = uitofp <16 x i32> %a to <16 x double> ret <16 x double> %1 } @@ -233,13 +233,13 @@ define <32 x double> @uitofpv32i32v32double(<32 x i32> %a) { ; SSE2: cost of 320 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i32v32double - ; AVX1: cost of 80 {{.*}} uitofp + ; AVX1: cost of 55 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i32v32double - ; AVX2: cost of 80 {{.*}} uitofp + ; AVX2: cost of 55 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i32v32double - ; AVX512F: cost of 88 {{.*}} uitofp + ; AVX512F: cost of 7 {{.*}} uitofp %1 = uitofp <32 x i32> %a to <32 x double> ret <32 x double> %1 } @@ -257,7 +257,7 @@ define <2 x double> @uitofpv2i64v2double(<2 x i64> %a) { ; AVX512F-LABEL: uitofpv2i64v2double ; AVX512F: cost of 5 {{.*}} uitofp ; - ; AVX512DQ: uitofpv2i64v2double + ; AVX512DQ-LABEL: uitofpv2i64v2double ; AVX512DQ: cost of 1 {{.*}} uitofp %1 = uitofp <2 x i64> %a to <2 x double> ret <2 x double> %1 @@ -276,7 +276,7 @@ define <4 x double> @uitofpv4i64v4double(<4 x i64> %a) { ; AVX512F-LABEL: uitofpv4i64v4double ; AVX512F: cost of 12 {{.*}} uitofp ; - ; AVX512DQ: uitofpv4i64v4double + ; AVX512DQ-LABEL: uitofpv4i64v4double ; AVX512DQ: cost of 1 {{.*}} uitofp %1 = uitofp <4 x i64> %a to <4 x double> ret <4 x double> %1 @@ -287,15 +287,15 @@ define <8 x double> @uitofpv8i64v8double(<8 x i64> %a) { ; SSE2: cost of 80 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv8i64v8double - ; AVX1: cost of 20 {{.*}} uitofp + ; AVX1: cost of 81 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv8i64v8double - ; AVX2: cost of 20 {{.*}} uitofp + ; AVX2: cost of 81 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv8i64v8double ; AVX512F: cost of 26 {{.*}} uitofp ; - ; AVX512DQ: uitofpv8i64v8double + ; AVX512DQ-LABEL: uitofpv8i64v8double ; AVX512DQ: cost of 1 {{.*}} uitofp %1 = uitofp <8 x i64> %a to <8 x double> ret <8 x double> %1 @@ -306,16 +306,16 @@ define <16 x double> @uitofpv16i64v16double(<16 x i64> %a) { ; SSE2: cost of 160 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i64v16double - ; AVX1: cost of 40 {{.*}} uitofp + ; AVX1: cost of 163 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i64v16double - ; AVX2: cost of 40 {{.*}} uitofp + ; AVX2: cost of 163 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i64v16double - ; AVX512F: cost of 44 {{.*}} uitofp + ; AVX512F: cost of 53 {{.*}} uitofp ; - ; AVX512DQ: uitofpv16i64v16double - ; AVX512DQ: cost of 44 {{.*}} uitofp + ; AVX512DQ-LABEL: uitofpv16i64v16double + ; AVX512DQ: cost of 3 {{.*}} uitofp %1 = uitofp <16 x i64> %a to <16 x double> ret <16 x double> %1 } @@ -325,16 +325,16 @@ define <32 x double> @uitofpv32i64v32double(<32 x i64> %a) { ; SSE2: cost of 320 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i64v32double - ; AVX1: cost of 80 {{.*}} uitofp + ; AVX1: cost of 327 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i64v32double - ; AVX2: cost of 80 {{.*}} uitofp + ; AVX2: cost of 327 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i64v32double - ; AVX512F: cost of 88 {{.*}} uitofp + ; AVX512F: cost of 107 {{.*}} uitofp ; - ; AVX512DQ: uitofpv32i64v32double - ; AVX512DQ: cost of 88 {{.*}} uitofp + ; AVX512DQ-LABEL: uitofpv32i64v32double + ; AVX512DQ: cost of 2 {{.*}} uitofp %1 = uitofp <32 x i64> %a to <32 x double> ret <32 x double> %1 } @@ -392,10 +392,10 @@ define <16 x float> @uitofpv16i8v16float(<16 x i8> %a) { ; SSE2: cost of 8 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i8v16float - ; AVX1: cost of 44 {{.*}} uitofp + ; AVX1: cost of 11 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i8v16float - ; AVX2: cost of 44 {{.*}} uitofp + ; AVX2: cost of 11 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i8v16float ; AVX512F: cost of 2 {{.*}} uitofp @@ -408,13 +408,13 @@ define <32 x float> @uitofpv32i8v32float(<32 x i8> %a) { ; SSE2: cost of 16 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i8v32float - ; AVX1: cost of 88 {{.*}} uitofp + ; AVX1: cost of 23 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i8v32float - ; AVX2: cost of 88 {{.*}} uitofp + ; AVX2: cost of 23 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i8v32float - ; AVX512F: cost of 92 {{.*}} uitofp + ; AVX512F: cost of 5 {{.*}} uitofp %1 = uitofp <32 x i8> %a to <32 x float> ret <32 x float> %1 } @@ -472,10 +472,10 @@ define <16 x float> @uitofpv16i16v16float(<16 x i16> %a) { ; SSE2: cost of 30 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i16v16float - ; AVX1: cost of 44 {{.*}} uitofp + ; AVX1: cost of 11 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i16v16float - ; AVX2: cost of 44 {{.*}} uitofp + ; AVX2: cost of 11 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i16v16float ; AVX512F: cost of 2 {{.*}} uitofp @@ -488,13 +488,13 @@ define <32 x float> @uitofpv32i16v32float(<32 x i16> %a) { ; SSE2: cost of 60 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i16v32float - ; AVX1: cost of 88 {{.*}} uitofp + ; AVX1: cost of 23 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i16v32float - ; AVX2: cost of 88 {{.*}} uitofp + ; AVX2: cost of 23 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i16v32float - ; AVX512F: cost of 92 {{.*}} uitofp + ; AVX512F: cost of 5 {{.*}} uitofp %1 = uitofp <32 x i16> %a to <32 x float> ret <32 x float> %1 } @@ -552,10 +552,10 @@ define <16 x float> @uitofpv16i32v16float(<16 x i32> %a) { ; SSE2: cost of 32 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i32v16float - ; AVX1: cost of 44 {{.*}} uitofp + ; AVX1: cost of 19 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i32v16float - ; AVX2: cost of 44 {{.*}} uitofp + ; AVX2: cost of 17 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i32v16float ; AVX512F: cost of 1 {{.*}} uitofp @@ -568,13 +568,13 @@ define <32 x float> @uitofpv32i32v32float(<32 x i32> %a) { ; SSE2: cost of 64 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i32v32float - ; AVX1: cost of 88 {{.*}} uitofp + ; AVX1: cost of 39 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i32v32float - ; AVX2: cost of 88 {{.*}} uitofp + ; AVX2: cost of 35 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i32v32float - ; AVX512F: cost of 92 {{.*}} uitofp + ; AVX512F: cost of 3 {{.*}} uitofp %1 = uitofp <32 x i32> %a to <32 x float> ret <32 x float> %1 } @@ -616,10 +616,10 @@ define <8 x float> @uitofpv8i64v8float(<8 x i64> %a) { ; SSE2: cost of 60 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv8i64v8float - ; AVX1: cost of 22 {{.*}} uitofp + ; AVX1: cost of 21 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv8i64v8float - ; AVX2: cost of 22 {{.*}} uitofp + ; AVX2: cost of 21 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv8i64v8float ; AVX512F: cost of 22 {{.*}} uitofp @@ -632,13 +632,13 @@ define <16 x float> @uitofpv16i64v16float(<16 x i64> %a) { ; SSE2: cost of 120 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i64v16float - ; AVX1: cost of 44 {{.*}} uitofp + ; AVX1: cost of 43 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i64v16float - ; AVX2: cost of 44 {{.*}} uitofp + ; AVX2: cost of 43 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i64v16float - ; AVX512F: cost of 46 {{.*}} uitofp + ; AVX512F: cost of 45 {{.*}} uitofp %1 = uitofp <16 x i64> %a to <16 x float> ret <16 x float> %1 } @@ -648,13 +648,13 @@ define <32 x float> @uitofpv32i64v32float(<32 x i64> %a) { ; SSE2: cost of 240 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i64v32float - ; AVX1: cost of 88 {{.*}} uitofp + ; AVX1: cost of 87 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i64v32float - ; AVX2: cost of 88 {{.*}} uitofp + ; AVX2: cost of 87 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i64v32float - ; AVX512F: cost of 92 {{.*}} uitofp + ; AVX512F: cost of 91 {{.*}} uitofp %1 = uitofp <32 x i64> %a to <32 x float> ret <32 x float> %1 } diff --git a/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/test/Transforms/LoopVectorize/X86/gather_scatter.ll index 23e363eae02..ec7ad1183ee 100644 --- a/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -17,9 +17,9 @@ target triple = "x86_64-pc_linux" ;} ;AVX512-LABEL: @foo1 -;AVX512: llvm.masked.load.v8i32 -;AVX512: llvm.masked.gather.v8f32 -;AVX512: llvm.masked.store.v8f32 +;AVX512: llvm.masked.load.v16i32 +;AVX512: llvm.masked.gather.v16f32 +;AVX512: llvm.masked.store.v16f32 ;AVX512: ret void ; Function Attrs: nounwind uwtable