From 8d04a8701ddaff1f4e41875ee0c6febf56b7c0c7 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 23 Feb 2017 09:59:29 +0000 Subject: [PATCH] Revert "[SLP] Fix for PR32036: Vectorized horizontal reduction returning wrong" This reverts commit d83c81ee6a8dea662808ac22b396d1bb0595c89d. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295951 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/SLPVectorizer.cpp | 32 ++++++++----------- .../SLPVectorizer/X86/horizontal-list.ll | 10 +++--- 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 20cc6384f07..cbb0e7a0c31 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -304,7 +304,6 @@ public: typedef SmallVector InstrList; typedef SmallPtrSet ValueSet; typedef SmallVector StoreList; - typedef MapVector> ExtraValueToDebugLocsMap; BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li, @@ -334,7 +333,7 @@ public: /// Vectorize the tree but with the list of externally used values \p /// ExternallyUsedValues. Values in this MapVector can be replaced but the /// generated extractvalue instructions. - Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues); + Value *vectorizeTree(MapVector &ExternallyUsedValues); /// \returns the cost incurred by unwanted spills and fills, caused by /// holding live values over call sites. @@ -353,7 +352,7 @@ public: /// into account (anf updating it, if required) list of externally used /// values stored in \p ExternallyUsedValues. void buildTree(ArrayRef Roots, - ExtraValueToDebugLocsMap &ExternallyUsedValues, + MapVector &ExternallyUsedValues, ArrayRef UserIgnoreLst = None); /// Clear the internal data structures that are created by 'buildTree'. @@ -954,11 +953,11 @@ private: void BoUpSLP::buildTree(ArrayRef Roots, ArrayRef UserIgnoreLst) { - ExtraValueToDebugLocsMap ExternallyUsedValues; + MapVector ExternallyUsedValues; buildTree(Roots, ExternallyUsedValues, UserIgnoreLst); } void BoUpSLP::buildTree(ArrayRef Roots, - ExtraValueToDebugLocsMap &ExternallyUsedValues, + MapVector &ExternallyUsedValues, ArrayRef UserIgnoreLst) { deleteTree(); UserIgnoreList = UserIgnoreLst; @@ -2802,12 +2801,12 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL, TreeEntry *E) { } Value *BoUpSLP::vectorizeTree() { - ExtraValueToDebugLocsMap ExternallyUsedValues; + MapVector ExternallyUsedValues; return vectorizeTree(ExternallyUsedValues); } Value * -BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { +BoUpSLP::vectorizeTree(MapVector &ExternallyUsedValues) { // All blocks must be scheduled before any instructions are inserted. for (auto &BSIter : BlocksSchedules) { @@ -2869,6 +2868,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { assert(ExternallyUsedValues.count(Scalar) && "Scalar with nullptr as an external user must be registered in " "ExternallyUsedValues map"); + DebugLoc DL = ExternallyUsedValues[Scalar]; if (auto *VecI = dyn_cast(Vec)) { Builder.SetInsertPoint(VecI->getParent(), std::next(VecI->getIterator())); @@ -2878,8 +2878,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { Value *Ex = Builder.CreateExtractElement(Vec, Lane); Ex = extend(ScalarRoot, Ex, Scalar->getType()); CSEBlocks.insert(cast(Scalar)->getParent()); - std::swap(ExternallyUsedValues[Ex], ExternallyUsedValues[Scalar]); ExternallyUsedValues.erase(Scalar); + ExternallyUsedValues[Ex] = DL; continue; } @@ -4439,11 +4439,9 @@ public: Builder.setFastMathFlags(Unsafe); unsigned i = 0; - BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues; - // The same extra argument may be used several time, so log each attempt - // to use it. + MapVector ExternallyUsedValues; for (auto &Pair : ExtraArgs) - ExternallyUsedValues[Pair.second].push_back(Pair.first->getDebugLoc()); + ExternallyUsedValues[Pair.second] = Pair.first->getDebugLoc(); while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) { auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth); V.buildTree(VL, ExternallyUsedValues, ReductionOps); @@ -4491,13 +4489,9 @@ public: Builder.CreateBinOp(ReductionOpcode, VectorizedTree, I); } for (auto &Pair : ExternallyUsedValues) { - // Add each externally used value to the final reduction. - assert(!Pair.second.empty() && "At least one DebugLoc must be added."); - for (auto &DL : Pair.second) { - Builder.SetCurrentDebugLocation(DL); - VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree, - Pair.first, "bin.extra"); - } + Builder.SetCurrentDebugLocation(Pair.second); + VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree, + Pair.first, "bin.extra"); } // Update users. if (ReductionPHI && !isa(ReductionPHI)) { diff --git a/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index 2eb0b2234ef..814c3a60f56 100644 --- a/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -1473,10 +1473,9 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], 5.000000e+00 -; CHECK-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], 5.000000e+00 -; CHECK-NEXT: [[BIN_EXTRA7:%.*]] = fadd fast float [[BIN_EXTRA6]], [[CONV]] +; CHECK-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], [[CONV]] ; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] -; CHECK-NEXT: ret float [[BIN_EXTRA7]] +; CHECK-NEXT: ret float [[BIN_EXTRA6]] ; ; THRESHOLD-LABEL: @extra_args_same_several_times( ; THRESHOLD-NEXT: entry: @@ -1511,10 +1510,9 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], 5.000000e+00 -; THRESHOLD-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], 5.000000e+00 -; THRESHOLD-NEXT: [[BIN_EXTRA7:%.*]] = fadd fast float [[BIN_EXTRA6]], [[CONV]] +; THRESHOLD-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], [[CONV]] ; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] -; THRESHOLD-NEXT: ret float [[BIN_EXTRA7]] +; THRESHOLD-NEXT: ret float [[BIN_EXTRA6]] ; entry: %mul = mul nsw i32 %b, %a