[SCEV] Don't store AddRec loop when simplifying multiplication of AddRecs

When multiplying several AddRecs, we do the following simplification:

{A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L> = {x=1 in [ sum y=x..2x
	[ sum z=max(y-x, y-n)..min(x,n) [
		choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z]]
	],+,...up to x=2n}

This is done iteratively, pair by pair. So if we try to multiply three AddRecs
A1, A2, A3, then we'd try to simplify A1 * A2 to A1' and then try to
simplify A1' * A3 if A1' is also an AddRec.
The transform is only legal if the loops of the two AddRecs are the same.
It is checked in the code, but the loop of one of the AddRecs is stored
in a local variable and doesn't get updated when we simplify a pair to a new AddRec.
In the motivating test the new AddRec A1' was created for a different loop and,
as the loop variable didn't get updated, the check for different loops passed and
the transform worked for two AddRecs from different loops.
So it created a wrong SCEV. And it caused LSR to replace an instruction with another
one that had the same SCEV as the incorrectly computed one.

Differential Revision: https://reviews.llvm.org/D153254
This commit is contained in:
Dmitry Makogon 2023-06-19 16:33:46 +07:00
parent efc290ce9c
commit ce1ac1cf18
2 changed files with 13 additions and 30 deletions

View File

@ -3261,9 +3261,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// if they are loop invariant w.r.t. the recurrence.
SmallVector<const SCEV *, 8> LIOps;
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
const Loop *AddRecLoop = AddRec->getLoop();
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
if (isAvailableAtLoopEntry(Ops[i], AddRec->getLoop())) {
LIOps.push_back(Ops[i]);
Ops.erase(Ops.begin()+i);
--i; --e;
@ -3286,7 +3285,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// will be inferred if either NUW or NSW is true.
SCEV::NoWrapFlags Flags = ComputeFlags({Scale, AddRec});
const SCEV *NewRec = getAddRecExpr(
NewOps, AddRecLoop, AddRec->getNoWrapFlags(Flags));
NewOps, AddRec->getLoop(), AddRec->getNoWrapFlags(Flags));
// If all of the other operands were loop invariant, we are done.
if (Ops.size() == 1) return NewRec;
@ -3320,7 +3319,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
++OtherIdx) {
const SCEVAddRecExpr *OtherAddRec =
dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
if (!OtherAddRec || OtherAddRec->getLoop() != AddRec->getLoop())
continue;
// Limit max number of arguments to avoid creation of unreasonably big
@ -3359,7 +3358,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1));
}
if (!Overflow) {
const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRecLoop,
const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
SCEV::FlagAnyWrap);
if (Ops.size() == 2) return NewAddRec;
Ops[Idx] = NewAddRec;

View File

@ -9,38 +9,22 @@ define void @test() {
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb3:
; CHECK-NEXT: [[LSR_IV13:%.*]] = phi i32 [ [[LSR_IV_NEXT14:%.*]], [[BB7:%.*]] ], [ 0, [[BB:%.*]] ]
; CHECK-NEXT: [[LSR_IV11:%.*]] = phi i32 [ [[LSR_IV_NEXT12:%.*]], [[BB7]] ], [ 98304, [[BB]] ]
; CHECK-NEXT: [[LSR_IV9:%.*]] = phi i32 [ [[LSR_IV_NEXT10:%.*]], [[BB7]] ], [ 0, [[BB]] ]
; CHECK-NEXT: [[LSR_IV7:%.*]] = phi i32 [ [[LSR_IV_NEXT8:%.*]], [[BB7]] ], [ 147456, [[BB]] ]
; CHECK-NEXT: [[LSR_IV5:%.*]] = phi i32 [ [[LSR_IV_NEXT6:%.*]], [[BB7]] ], [ 0, [[BB]] ]
; CHECK-NEXT: [[LSR_IV3:%.*]] = phi i32 [ [[LSR_IV_NEXT4:%.*]], [[BB7]] ], [ 57344, [[BB]] ]
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[BB7]] ], [ 0, [[BB]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB7]] ], [ 4096, [[BB]] ]
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[BB7:%.*]] ], [ 0, [[BB:%.*]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB7]] ], [ 16, [[BB]] ]
; CHECK-NEXT: br label [[BB4:%.*]]
; CHECK: bb4:
; CHECK-NEXT: [[LSR_IV21:%.*]] = phi i32 [ [[LSR_IV_NEXT22:%.*]], [[BB6:%.*]] ], [ 0, [[BB3]] ]
; CHECK-NEXT: [[LSR_IV19:%.*]] = phi i32 [ [[LSR_IV_NEXT20:%.*]], [[BB6]] ], [ [[LSR_IV1]], [[BB3]] ]
; CHECK-NEXT: [[LSR_IV17:%.*]] = phi i32 [ [[LSR_IV_NEXT18:%.*]], [[BB6]] ], [ [[LSR_IV5]], [[BB3]] ]
; CHECK-NEXT: [[LSR_IV15:%.*]] = phi i32 [ [[LSR_IV_NEXT16:%.*]], [[BB6]] ], [ [[LSR_IV9]], [[BB3]] ]
; CHECK-NEXT: [[LSR_IV3:%.*]] = phi i32 [ [[LSR_IV_NEXT4:%.*]], [[BB6:%.*]] ], [ [[LSR_IV1]], [[BB3]] ]
; CHECK-NEXT: br i1 true, label [[BB7]], label [[BB6]]
; CHECK: bb6:
; CHECK-NEXT: [[LSR_IV_NEXT16]] = add i32 [[LSR_IV15]], [[LSR_IV13]]
; CHECK-NEXT: [[LSR_IV_NEXT18]] = add i32 [[LSR_IV17]], [[LSR_IV15]]
; CHECK-NEXT: [[LSR_IV_NEXT20]] = add i32 [[LSR_IV19]], [[LSR_IV17]]
; CHECK-NEXT: [[LSR_IV_NEXT22]] = add i32 [[LSR_IV21]], [[LSR_IV19]]
; CHECK-NEXT: [[LSR_IV_NEXT4]] = add i32 [[LSR_IV3]], 268435456
; CHECK-NEXT: br label [[BB4]]
; CHECK: bb7:
; CHECK-NEXT: call void @foo(i32 [[LSR_IV21]])
; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[LSR_IV21]] to i64
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 8192
; CHECK-NEXT: [[MUL9:%.*]] = mul i32 [[LSR_IV3]], [[LSR_IV3]]
; CHECK-NEXT: [[MUL10:%.*]] = mul i32 [[MUL9]], [[LSR_IV3]]
; CHECK-NEXT: call void @foo(i32 [[MUL10]])
; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[MUL10]] to i64
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 32
; CHECK-NEXT: [[LSR_IV_NEXT2]] = add i32 [[LSR_IV1]], [[LSR_IV]]
; CHECK-NEXT: [[LSR_IV_NEXT4]] = add i32 [[LSR_IV3]], 114688
; CHECK-NEXT: [[LSR_IV_NEXT6]] = add i32 [[LSR_IV5]], [[LSR_IV3]]
; CHECK-NEXT: [[LSR_IV_NEXT8]] = add i32 [[LSR_IV7]], 294912
; CHECK-NEXT: [[LSR_IV_NEXT10]] = add i32 [[LSR_IV9]], [[LSR_IV7]]
; CHECK-NEXT: [[LSR_IV_NEXT12]] = add i32 [[LSR_IV11]], 196608
; CHECK-NEXT: [[LSR_IV_NEXT14]] = add i32 [[LSR_IV13]], [[LSR_IV11]]
; CHECK-NEXT: br label [[BB3]]
;
bb: