[SLP]Fix a crash when trying to find operand with re-vectorized main

instruction.

Need to check if the operand scalars are vectorized in the a different
vector node, if the main instruction is already gets vectorized in other
vector node.
This commit is contained in:
Alexey Bataev 2023-09-20 09:25:20 -07:00
parent a590ff589c
commit ebed4692f8
2 changed files with 77 additions and 12 deletions

View File

@ -2916,8 +2916,11 @@ private:
const TreeEntry *TE = getTreeEntry(V);
assert((!TE || TE == Last || doesNotNeedToBeScheduled(V)) &&
"Scalar already in tree!");
if (TE)
if (TE) {
if (TE != Last)
MultiNodeScalars.insert(V);
continue;
}
ScalarToTreeEntry[V] = Last;
}
// Update the scheduler bundle to point to this TreeEntry.
@ -2976,6 +2979,9 @@ private:
/// Maps a specific scalar to its tree entry.
SmallDenseMap<Value *, TreeEntry *> ScalarToTreeEntry;
/// List of scalars, used in several vectorize nodes.
SmallDenseSet<Value *> MultiNodeScalars;
/// Maps a value to the proposed vectorizable size.
SmallDenseMap<Value *, unsigned> InstrElementSize;
@ -9843,17 +9849,32 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
S = getSameOpcode(*It, *TLI);
}
if (S.getOpcode()) {
if (TreeEntry *VE = getTreeEntry(S.OpValue);
VE && VE->isSame(VL) &&
(any_of(VE->UserTreeIndices,
[E, NodeIdx](const EdgeInfo &EI) {
return EI.UserTE == E && EI.EdgeIdx == NodeIdx;
}) ||
any_of(VectorizableTree,
[E, NodeIdx, VE](const std::unique_ptr<TreeEntry> &TE) {
return TE->isOperandGatherNode({E, NodeIdx}) &&
VE->isSame(TE->Scalars);
}))) {
auto CheckSameVE = [&](const TreeEntry *VE) {
return VE->isSame(VL) &&
(any_of(VE->UserTreeIndices,
[E, NodeIdx](const EdgeInfo &EI) {
return EI.UserTE == E && EI.EdgeIdx == NodeIdx;
}) ||
any_of(VectorizableTree,
[E, NodeIdx, VE](const std::unique_ptr<TreeEntry> &TE) {
return TE->isOperandGatherNode({E, NodeIdx}) &&
VE->isSame(TE->Scalars);
}));
};
TreeEntry *VE = getTreeEntry(S.OpValue);
bool IsSameVE = VE && CheckSameVE(VE);
if (!IsSameVE && MultiNodeScalars.contains(S.OpValue)) {
auto *I =
find_if(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
return TE->State != TreeEntry::NeedToGather && TE.get() != VE &&
CheckSameVE(TE.get());
});
if (I != VectorizableTree.end()) {
VE = I->get();
IsSameVE = true;
}
}
if (IsSameVE) {
auto FinalShuffle = [&](Value *V, ArrayRef<int> Mask) {
ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
ShuffleBuilder.add(V, Mask);

View File

@ -31,3 +31,47 @@ define void @test(double %0) {
%9 = fcmp olt double %8, 0.000000e+00
br label %2
}
define void @test1(double %0, <4 x double> %v) {
; CHECK-LABEL: define void @test1(
; CHECK-SAME: double [[TMP0:%.*]], <4 x double> [[V:%.*]]) {
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[V]], <4 x double> poison, <2 x i32> <i32 poison, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP0]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 0>
; CHECK-NEXT: br label [[TMP5:%.*]]
; CHECK: 5:
; CHECK-NEXT: [[TMP6:%.*]] = fsub <4 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>, [[V]]
; CHECK-NEXT: [[TMP7:%.*]] = fsub <4 x double> <double 0.000000e+00, double 1.000000e+00, double 0.000000e+00, double 0.000000e+00>, [[TMP4]]
; CHECK-NEXT: br label [[DOTBACKEDGE:%.*]]
; CHECK: .backedge:
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x double> [[TMP7]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = fcmp olt <4 x double> [[TMP8]], zeroinitializer
; CHECK-NEXT: br label [[TMP5]]
;
%e0 = extractelement <4 x double> %v, i32 0
%e1 = extractelement <4 x double> %v, i32 1
%e2 = extractelement <4 x double> %v, i32 2
%e3 = extractelement <4 x double> %v, i32 3
br label %2
2:
%m1 = fsub double 1.000000e+00, %e0
%m2 = fsub double 2.000000e+00, %e1
%m3 = fsub double 3.000000e+00, %e2
%m4 = fsub double 4.000000e+00, %e3
%3 = fsub double 0.000000e+00, %0
%4 = fsub double 0.000000e+00, %0
%5 = fsub double 0.000000e+00, %0
br label %.backedge
.backedge:
%6 = fmul double %m1, %m2
%7 = fcmp olt double %6, 0.000000e+00
%8 = fmul double %3, %m1
%9 = fcmp olt double %8, 0.000000e+00
%10 = fmul double %4, %m3
%11 = fcmp olt double %10, 0.000000e+00
%12 = fmul double %5, %m4
%13 = fcmp olt double %12, 0.000000e+00
br label %2
}