[SLP]Fix PR77916: transform the whole mask, not only the elements for

the second vector.

Need to transform all elements in the long mask, if we decided to
produce shorter version, some elements may still have incorrect inifices
after transformation for the first vector in the permutation.
This commit is contained in:
Alexey Bataev 2024-01-12 06:56:14 -08:00
parent 5dbf178154
commit 6fdc2ce8c5
2 changed files with 47 additions and 0 deletions

View File

@ -7379,6 +7379,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
continue;
if (Idx >= static_cast<int>(CommonVF))
Idx = E1Mask[Idx - CommonVF] + VF;
else
Idx = E1Mask[Idx];
}
CommonVF = VF;
}

View File

@ -0,0 +1,45 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-10 < %s | FileCheck %s
define i32 @test() {
; CHECK-LABEL: define i32 @test() {
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP5:%.*]], [[BB3:%.*]] ], [ zeroinitializer, [[BB:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1>
; CHECK-NEXT: br i1 false, label [[BB4:%.*]], label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> zeroinitializer, [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> zeroinitializer, [[TMP2]]
; CHECK-NEXT: [[TMP5]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: br label [[BB1]]
; CHECK: bb4:
; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x i32> [ [[TMP1]], [[BB1]] ]
; CHECK-NEXT: ret i32 0
;
bb:
br label %bb1
bb1:
%phi = phi i32 [ %or, %bb3 ], [ 0, %bb ]
%phi2 = phi i32 [ %add, %bb3 ], [ 0, %bb ]
br i1 false, label %bb4, label %bb3
bb3:
%or = or i32 0, %phi
%add = add i32 0, 0
br label %bb1
bb4:
%phi5 = phi i32 [ %phi2, %bb1 ]
%phi6 = phi i32 [ %phi2, %bb1 ]
%phi7 = phi i32 [ %phi2, %bb1 ]
%phi8 = phi i32 [ %phi2, %bb1 ]
%phi9 = phi i32 [ %phi2, %bb1 ]
%phi10 = phi i32 [ %phi2, %bb1 ]
%phi11 = phi i32 [ %phi, %bb1 ]
%phi12 = phi i32 [ %phi, %bb1 ]
ret i32 0
}