[InstCombine] Fold '(-1 u/ %x) u< %y' to '@llvm.umul.with.overflow' + overflow bit extraction

Summary:
`(-1 u/ %x) u< %y` is one of (3?) common ways to check that
some unsigned multiplication (will not) overflow.
Currently, we don't catch it. We could:
```
----------------------------------------
Name: no overflow
  %o0 = udiv i4 -1, %x
  %r = icmp ult i4 %o0, %y
=>
  %o0 = udiv i4 -1, %x
  %n0 = umul_overflow i4 %x, %y
  %r = extractvalue {i4, i1} %n0, 1

Done: 1
Optimization is correct!

----------------------------------------
Name: no overflow, swapped
  %o0 = udiv i4 -1, %x
  %r = icmp ugt i4 %y, %o0
=>
  %o0 = udiv i4 -1, %x
  %n0 = umul_overflow i4 %x, %y
  %r = extractvalue {i4, i1} %n0, 1

Done: 1
Optimization is correct!

----------------------------------------
Name: overflow
  %o0 = udiv i4 -1, %x
  %r = icmp uge i4 %o0, %y
=>
  %o0 = udiv i4 -1, %x
  %n0 = umul_overflow i4 %x, %y
  %n1 = extractvalue {i4, i1} %n0, 1
  %r = xor %n1, -1

Done: 1
Optimization is correct!

----------------------------------------
Name: overflow
  %o0 = udiv i4 -1, %x
  %r = icmp ule i4 %y, %o0
=>
  %o0 = udiv i4 -1, %x
  %n0 = umul_overflow i4 %x, %y
  %n1 = extractvalue {i4, i1} %n0, 1
  %r = xor %n1, -1

Done: 1
Optimization is correct!
```

As it can be observed from tests, while simply forming the `@llvm.umul.with.overflow`
is easy, if we were looking for the inverted answer, then more work needs to be done
to cleanup the now-pointless control-flow that was guarding against division-by-zero.
This is being addressed in follow-up patches.

Reviewers: nikic, spatel, efriedma, xbolva00, RKSimon

Reviewed By: nikic, xbolva00

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D65143

llvm-svn: 370347
This commit is contained in:
Roman Lebedev 2019-08-29 12:47:08 +00:00
parent dd43ea60e5
commit dae81771b4
4 changed files with 134 additions and 38 deletions

View File

@ -3525,6 +3525,50 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
Constant::getNullValue(WidestTy));
}
/// Fold
/// (-1 u/ x) u< y
/// to
/// @llvm.umul.with.overflow(x, y) plus extraction of overflow bit
/// Note that the comparison is commutative, while inverted (u>=) predicate
/// will mean that we are looking for the opposite answer.
static Value *
foldUnsignedMultiplicationOverflowCheck(ICmpInst &I,
InstCombiner::BuilderTy &Builder) {
ICmpInst::Predicate Pred;
Value *X, *Y;
bool NeedNegation;
// Look for: (-1 u/ x) u</u>= y
if (!I.isEquality() &&
match(&I, m_c_ICmp(Pred, m_OneUse(m_UDiv(m_AllOnes(), m_Value(X))),
m_Value(Y)))) {
// Canonicalize as-if y was on RHS.
if (I.getOperand(1) != Y)
Pred = I.getSwappedPredicate();
// Are we checking that overflow does not happen, or does happen?
switch (Pred) {
case ICmpInst::Predicate::ICMP_ULT:
NeedNegation = false;
break; // OK
case ICmpInst::Predicate::ICMP_UGE:
NeedNegation = true;
break; // OK
default:
return nullptr; // Wrong predicate.
}
} else
return nullptr;
Function *F = Intrinsic::getDeclaration(
I.getModule(), Intrinsic::umul_with_overflow, X->getType());
CallInst *Call = Builder.CreateCall(F, {X, Y}, "umul");
Value *Res = Builder.CreateExtractValue(Call, 1, "umul.ov");
if (NeedNegation) // This technically increases instruction count.
Res = Builder.CreateNot(Res, "umul.not.ov");
return Res;
}
/// Try to fold icmp (binop), X or icmp X, (binop).
/// TODO: A large part of this logic is duplicated in InstSimplify's
/// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
@ -3874,6 +3918,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
}
}
if (Value *V = foldUnsignedMultiplicationOverflowCheck(I, Builder))
return replaceInstUsesWith(I, V);
if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
return replaceInstUsesWith(I, V);

View File

@ -8,9 +8,10 @@
define i1 @t0_basic(i8 %x, i8 %y) {
; CHECK-LABEL: @t0_basic(
; CHECK-NEXT: [[T0:%.*]] = udiv i8 -1, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp uge i8 [[T0]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[R]]
; CHECK-NEXT: [[UMUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i8, i1 } [[UMUL]], 1
; CHECK-NEXT: [[UMUL_NOT_OV:%.*]] = xor i1 [[UMUL_OV]], true
; CHECK-NEXT: ret i1 [[UMUL_NOT_OV]]
;
%t0 = udiv i8 -1, %x
%r = icmp uge i8 %t0, %y
@ -19,9 +20,10 @@ define i1 @t0_basic(i8 %x, i8 %y) {
define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @t1_vec(
; CHECK-NEXT: [[T0:%.*]] = udiv <2 x i8> <i8 -1, i8 -1>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp uge <2 x i8> [[T0]], [[Y:%.*]]
; CHECK-NEXT: ret <2 x i1> [[R]]
; CHECK-NEXT: [[UMUL:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.umul.with.overflow.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]])
; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[UMUL]], 1
; CHECK-NEXT: [[UMUL_NOT_OV:%.*]] = xor <2 x i1> [[UMUL_OV]], <i1 true, i1 true>
; CHECK-NEXT: ret <2 x i1> [[UMUL_NOT_OV]]
;
%t0 = udiv <2 x i8> <i8 -1, i8 -1>, %x
%r = icmp uge <2 x i8> %t0, %y
@ -30,9 +32,10 @@ define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) {
define <3 x i1> @t2_vec_undef(<3 x i8> %x, <3 x i8> %y) {
; CHECK-LABEL: @t2_vec_undef(
; CHECK-NEXT: [[T0:%.*]] = udiv <3 x i8> <i8 -1, i8 undef, i8 -1>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp uge <3 x i8> [[T0]], [[Y:%.*]]
; CHECK-NEXT: ret <3 x i1> [[R]]
; CHECK-NEXT: [[UMUL:%.*]] = call { <3 x i8>, <3 x i1> } @llvm.umul.with.overflow.v3i8(<3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]])
; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { <3 x i8>, <3 x i1> } [[UMUL]], 1
; CHECK-NEXT: [[UMUL_NOT_OV:%.*]] = xor <3 x i1> [[UMUL_OV]], <i1 true, i1 true, i1 true>
; CHECK-NEXT: ret <3 x i1> [[UMUL_NOT_OV]]
;
%t0 = udiv <3 x i8> <i8 -1, i8 undef, i8 -1>, %x
%r = icmp uge <3 x i8> %t0, %y
@ -43,10 +46,11 @@ declare i8 @gen8()
define i1 @t3_commutative(i8 %x) {
; CHECK-LABEL: @t3_commutative(
; CHECK-NEXT: [[T0:%.*]] = udiv i8 -1, [[X:%.*]]
; CHECK-NEXT: [[Y:%.*]] = call i8 @gen8()
; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[Y]], [[T0]]
; CHECK-NEXT: ret i1 [[R]]
; CHECK-NEXT: [[UMUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 [[Y]])
; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i8, i1 } [[UMUL]], 1
; CHECK-NEXT: [[UMUL_NOT_OV:%.*]] = xor i1 [[UMUL_OV]], true
; CHECK-NEXT: ret i1 [[UMUL_NOT_OV]]
;
%t0 = udiv i8 -1, %x
%y = call i8 @gen8()

View File

@ -8,9 +8,9 @@
define i1 @t0_basic(i8 %x, i8 %y) {
; CHECK-LABEL: @t0_basic(
; CHECK-NEXT: [[T0:%.*]] = udiv i8 -1, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[T0]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[R]]
; CHECK-NEXT: [[UMUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i8, i1 } [[UMUL]], 1
; CHECK-NEXT: ret i1 [[UMUL_OV]]
;
%t0 = udiv i8 -1, %x
%r = icmp ult i8 %t0, %y
@ -19,9 +19,9 @@ define i1 @t0_basic(i8 %x, i8 %y) {
define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @t1_vec(
; CHECK-NEXT: [[T0:%.*]] = udiv <2 x i8> <i8 -1, i8 -1>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[T0]], [[Y:%.*]]
; CHECK-NEXT: ret <2 x i1> [[R]]
; CHECK-NEXT: [[UMUL:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.umul.with.overflow.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]])
; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[UMUL]], 1
; CHECK-NEXT: ret <2 x i1> [[UMUL_OV]]
;
%t0 = udiv <2 x i8> <i8 -1, i8 -1>, %x
%r = icmp ult <2 x i8> %t0, %y
@ -30,9 +30,9 @@ define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) {
define <3 x i1> @t2_vec_undef(<3 x i8> %x, <3 x i8> %y) {
; CHECK-LABEL: @t2_vec_undef(
; CHECK-NEXT: [[T0:%.*]] = udiv <3 x i8> <i8 -1, i8 undef, i8 -1>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp ult <3 x i8> [[T0]], [[Y:%.*]]
; CHECK-NEXT: ret <3 x i1> [[R]]
; CHECK-NEXT: [[UMUL:%.*]] = call { <3 x i8>, <3 x i1> } @llvm.umul.with.overflow.v3i8(<3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]])
; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { <3 x i8>, <3 x i1> } [[UMUL]], 1
; CHECK-NEXT: ret <3 x i1> [[UMUL_OV]]
;
%t0 = udiv <3 x i8> <i8 -1, i8 undef, i8 -1>, %x
%r = icmp ult <3 x i8> %t0, %y
@ -43,10 +43,10 @@ declare i8 @gen8()
define i1 @t3_commutative(i8 %x) {
; CHECK-LABEL: @t3_commutative(
; CHECK-NEXT: [[T0:%.*]] = udiv i8 -1, [[X:%.*]]
; CHECK-NEXT: [[Y:%.*]] = call i8 @gen8()
; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[Y]], [[T0]]
; CHECK-NEXT: ret i1 [[R]]
; CHECK-NEXT: [[UMUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 [[Y]])
; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i8, i1 } [[UMUL]], 1
; CHECK-NEXT: ret i1 [[UMUL_OV]]
;
%t0 = udiv i8 -1, %x
%y = call i8 @gen8()

View File

@ -18,17 +18,61 @@ target triple = "x86_64-pc-linux-gnu"
; }
define i1 @will_not_overflow(i64 %arg, i64 %arg1) {
; ALL-LABEL: @will_not_overflow(
; ALL-NEXT: bb:
; ALL-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
; ALL-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
; ALL: bb2:
; ALL-NEXT: [[T3:%.*]] = udiv i64 -1, [[ARG]]
; ALL-NEXT: [[T4:%.*]] = icmp ult i64 [[T3]], [[ARG1:%.*]]
; ALL-NEXT: br label [[BB5]]
; ALL: bb5:
; ALL-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[T4]], [[BB2]] ]
; ALL-NEXT: ret i1 [[T6]]
; SIMPLIFYCFG-LABEL: @will_not_overflow(
; SIMPLIFYCFG-NEXT: bb:
; SIMPLIFYCFG-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
; SIMPLIFYCFG-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
; SIMPLIFYCFG: bb2:
; SIMPLIFYCFG-NEXT: [[T3:%.*]] = udiv i64 -1, [[ARG]]
; SIMPLIFYCFG-NEXT: [[T4:%.*]] = icmp ult i64 [[T3]], [[ARG1:%.*]]
; SIMPLIFYCFG-NEXT: br label [[BB5]]
; SIMPLIFYCFG: bb5:
; SIMPLIFYCFG-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[T4]], [[BB2]] ]
; SIMPLIFYCFG-NEXT: ret i1 [[T6]]
;
; INSTCOMBINEONLY-LABEL: @will_not_overflow(
; INSTCOMBINEONLY-NEXT: bb:
; INSTCOMBINEONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
; INSTCOMBINEONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
; INSTCOMBINEONLY: bb2:
; INSTCOMBINEONLY-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
; INSTCOMBINEONLY-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
; INSTCOMBINEONLY-NEXT: br label [[BB5]]
; INSTCOMBINEONLY: bb5:
; INSTCOMBINEONLY-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[UMUL_OV]], [[BB2]] ]
; INSTCOMBINEONLY-NEXT: ret i1 [[T6]]
;
; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_not_overflow(
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb:
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 false, i1 [[UMUL_OV]]
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]]
;
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_not_overflow(
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb:
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp ne i64 [[ARG:%.*]], 0
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = and i1 [[UMUL_OV]], [[T0]]
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]]
;
; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-LABEL: @will_not_overflow(
; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT: bb:
; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 false, i1 [[UMUL_OV]]
; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT: ret i1 [[T6]]
;
; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-LABEL: @will_not_overflow(
; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: bb:
; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[T0:%.*]] = icmp ne i64 [[ARG:%.*]], 0
; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[T6:%.*]] = and i1 [[UMUL_OV]], [[T0]]
; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: ret i1 [[T6]]
;
bb:
%t0 = icmp eq i64 %arg, 0
@ -65,11 +109,12 @@ define i1 @will_overflow(i64 %arg, i64 %arg1) {
; INSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
; INSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
; INSTCOMBINE: bb2:
; INSTCOMBINE-NEXT: [[T3:%.*]] = udiv i64 -1, [[ARG]]
; INSTCOMBINE-NEXT: [[T4:%.*]] = icmp uge i64 [[T3]], [[ARG1:%.*]]
; INSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
; INSTCOMBINE-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
; INSTCOMBINE-NEXT: [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true
; INSTCOMBINE-NEXT: br label [[BB5]]
; INSTCOMBINE: bb5:
; INSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[T4]], [[BB2]] ]
; INSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHITMP]], [[BB2]] ]
; INSTCOMBINE-NEXT: ret i1 [[T6]]
;
bb: