[SCEVExpander] Expand umin_seq using freeze

%x umin_seq %y is currently expanded to %x == 0 ? 0 : umin(%x, %y).
This patch changes the expansion to umin(%x, freeze %y) instead
(https://alive2.llvm.org/ce/z/wujUhp).

The motivation for this change are the test cases affected by
D124910, where the freeze expansion ultimately produces better
optimization results. This is largely because
`(%x umin_seq %y) == %x` is a common expansion pattern, which
reliably optimizes in freeze representation, but only sometimes
with the zero comparison (in particular, if %x == 0 can fold to
something else, we generally won't be able to cover reasonable
code from this.)

Differential Revision: https://reviews.llvm.org/D125372
This commit is contained in:
Nikita Popov 2022-05-11 12:34:16 +02:00
parent 323514de58
commit e9a1c82d69
3 changed files with 24 additions and 39 deletions

View File

@ -449,7 +449,7 @@ private:
const Loop *getRelevantLoop(const SCEV *);
Value *expandMinMaxExpr(const SCEVNAryExpr *S, Intrinsic::ID IntrinID,
Twine Name);
Twine Name, bool IsSequential = false);
Value *visitConstant(const SCEVConstant *S) { return S->getValue(); }

View File

@ -1671,11 +1671,16 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
}
Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
Intrinsic::ID IntrinID, Twine Name) {
Intrinsic::ID IntrinID, Twine Name,
bool IsSequential) {
Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
Type *Ty = LHS->getType();
if (IsSequential)
LHS = Builder.CreateFreeze(LHS);
for (int i = S->getNumOperands() - 2; i >= 0; --i) {
Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
if (IsSequential && i != 0)
RHS = Builder.CreateFreeze(RHS);
Value *Sel;
if (Ty->isIntegerTy())
Sel = Builder.CreateIntrinsic(IntrinID, {Ty}, {LHS, RHS},
@ -1707,21 +1712,7 @@ Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
}
Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
SmallVector<Value *> Ops;
for (const SCEV *Op : S->operands())
Ops.emplace_back(expand(Op));
Value *SaturationPoint =
MinMaxIntrinsic::getSaturationPoint(Intrinsic::umin, S->getType());
SmallVector<Value *> OpIsZero;
for (Value *Op : ArrayRef<Value *>(Ops).drop_back())
OpIsZero.emplace_back(Builder.CreateICmpEQ(Op, SaturationPoint));
Value *AnyOpIsZero = Builder.CreateLogicalOr(OpIsZero);
Value *NaiveUMin = expandMinMaxExpr(S, Intrinsic::umin, "umin");
return Builder.CreateSelect(AnyOpIsZero, SaturationPoint, NaiveUMin);
return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true);
}
Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty,

View File

@ -4,14 +4,13 @@
define i32 @logical_and_2ops(i32 %n, i32 %m) {
; CHECK-LABEL: @logical_and_2ops(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[M:%.*]], i32 [[N:%.*]])
; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[M:%.*]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[N]], 0
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 0, i32 [[UMIN]]
; CHECK-NEXT: ret i32 [[TMP1]]
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[N:%.*]])
; CHECK-NEXT: ret i32 [[UMIN]]
;
entry:
br label %loop
@ -29,14 +28,13 @@ exit:
define i32 @logical_or_2ops(i32 %n, i32 %m) {
; CHECK-LABEL: @logical_or_2ops(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[M:%.*]], i32 [[N:%.*]])
; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[M:%.*]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[N]], 0
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 0, i32 [[UMIN]]
; CHECK-NEXT: ret i32 [[TMP1]]
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[N:%.*]])
; CHECK-NEXT: ret i32 [[UMIN]]
;
entry:
br label %loop
@ -54,17 +52,15 @@ exit:
define i32 @logical_and_3ops(i32 %n, i32 %m, i32 %k) {
; CHECK-LABEL: @logical_and_3ops(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[M:%.*]], 0
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[K:%.*]], i32 [[M]])
; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[K:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[M:%.*]]
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N]], 0
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i1 true, i1 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[UMIN1]]
; CHECK-NEXT: ret i32 [[TMP3]]
; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
; CHECK-NEXT: ret i32 [[UMIN1]]
;
entry:
br label %loop
@ -84,17 +80,15 @@ exit:
define i32 @logical_or_3ops(i32 %n, i32 %m, i32 %k) {
; CHECK-LABEL: @logical_or_3ops(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[M:%.*]], 0
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[K:%.*]], i32 [[M]])
; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[K:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[M:%.*]]
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N]], 0
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i1 true, i1 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[UMIN1]]
; CHECK-NEXT: ret i32 [[TMP3]]
; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
; CHECK-NEXT: ret i32 [[UMIN1]]
;
entry:
br label %loop