[SCCP] Use constant ranges for binary operators.

If one of the operands of a binary operator is a constant range, we can use ConstantRange::binaryOp to approximate the result. We still handle single element constant ranges as we did previously, with ConstantExpr::get(), because ConstantRange::binaryOp still gives worse results in a few cases for single element ranges. Also note that we bail out early if any of the operands is still unknown. Reviewers: davide, efriedma, mssimpso Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D71936
2024-12-11 05:24:16 +00:00 · 2020-03-19 09:24:09 +00:00 · 2020-03-19 09:24:09 +00:00 · b3d85ce4e1
commit b3d85ce4e1
parent b875f44290
5 changed files with 165 additions and 56 deletions
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@ -977,9 +977,18 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
  LatticeVal V2State = getValueState(I.getOperand(1));

  LatticeVal &IV = ValueState[&I];
-  if (isOverdefined(IV))
+  if (IV.isOverdefined())
+    return;
+
+  // If something is undef, wait for it to resolve.
+  if (V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef())
+    return;
+
+  if (V1State.isOverdefined() && V2State.isOverdefined())
    return (void)markOverdefined(&I);

+  // Both operands are non-integer constants or constant expressions.
+  // TODO: Use information from notconstant better.
  if (isConstant(V1State) && isConstant(V2State)) {
    Constant *C = ConstantExpr::get(I.getOpcode(), getConstant(V1State),
                                    getConstant(V2State));
@ -989,50 +998,21 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
    return (void)markConstant(IV, &I, C);
  }

-  // If something is undef, wait for it to resolve.
-  if (V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef())
-    return;
+  // Operands are either constant ranges, notconstant, overdefined or one of the
+  // operands is a constant.
+  ConstantRange A = ConstantRange::getFull(I.getType()->getScalarSizeInBits());
+  ConstantRange B = ConstantRange::getFull(I.getType()->getScalarSizeInBits());
+  if (V1State.isConstantRange())
+    A = V1State.getConstantRange();
+  if (V2State.isConstantRange())
+    B = V2State.getConstantRange();

-  // Otherwise, one of our operands is overdefined.  Try to produce something
-  // better than overdefined with some tricks.
-  // If this is 0 / Y, it doesn't matter that the second operand is
-  // overdefined, and we can replace it with zero.
-  if (I.getOpcode() == Instruction::UDiv || I.getOpcode() == Instruction::SDiv)
-    if (isConstant(V1State) && getConstant(V1State)->isNullValue())
-      return (void)markConstant(IV, &I, getConstant(V1State));
+  ConstantRange R = A.binaryOp(cast<BinaryOperator>(&I)->getOpcode(), B);
+  mergeInValue(&I, LatticeVal::getRange(R));

-  // If this is:
-  // -> AND/MUL with 0
-  // -> OR with -1
-  // it doesn't matter that the other operand is overdefined.
-  if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Mul ||
-      I.getOpcode() == Instruction::Or) {
-    LatticeVal *NonOverdefVal = nullptr;
-    if (!isOverdefined(V1State))
-      NonOverdefVal = &V1State;
-
-    else if (!isOverdefined(V2State))
-      NonOverdefVal = &V2State;
-    if (NonOverdefVal) {
-      if (!isConstant(*NonOverdefVal))
-        return;
-
-      if (I.getOpcode() == Instruction::And ||
-          I.getOpcode() == Instruction::Mul) {
-        // X and 0 = 0
-        // X * 0 = 0
-        if (getConstant(*NonOverdefVal)->isNullValue())
-          return (void)markConstant(IV, &I, getConstant(*NonOverdefVal));
-      } else {
-        // X or -1 = -1
-        if (ConstantInt *CI = getConstantInt(*NonOverdefVal))
-          if (CI->isMinusOne())
-            return (void)markConstant(IV, &I, CI);
-      }
-    }
-  }
-
-  markOverdefined(&I);
+  // TODO: Currently we do not exploit special values that produce something
+  // better than overdefined with an overdefined operand for vector or floating
+  // point types, like and <4 x i32> overdefined, zeroinitializer.
 }

 // Handle ICmpInst instruction.
--- a/test/Transforms/SCCP/binaryops-range-special-cases.ll
+++ b/test/Transforms/SCCP/binaryops-range-special-cases.ll
@ -7,16 +7,13 @@ define void @sdiv1_cmp_constants(i32 %x) {
 ; CHECK-NEXT:    [[D:%.*]] = sdiv i32 1, [[X:%.*]]
 ; CHECK-NEXT:    [[C_0:%.*]] = icmp slt i32 0, [[D]]
 ; CHECK-NEXT:    call void @use(i1 [[C_0]])
-; CHECK-NEXT:    [[C_1:%.*]] = icmp slt i32 1, [[D]]
-; CHECK-NEXT:    call void @use(i1 [[C_1]])
-; CHECK-NEXT:    [[C_2:%.*]] = icmp slt i32 2, [[D]]
-; CHECK-NEXT:    call void @use(i1 [[C_2]])
+; CHECK-NEXT:    call void @use(i1 false)
+; CHECK-NEXT:    call void @use(i1 false)
 ; CHECK-NEXT:    [[C_3:%.*]] = icmp eq i32 1, [[D]]
 ; CHECK-NEXT:    call void @use(i1 [[C_3]])
 ; CHECK-NEXT:    [[C_4:%.*]] = icmp eq i32 0, [[D]]
 ; CHECK-NEXT:    call void @use(i1 [[C_4]])
-; CHECK-NEXT:    [[C_5:%.*]] = icmp eq i32 2, [[D]]
-; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    call void @use(i1 false)
 ; CHECK-NEXT:    ret void
 ;
  %d = sdiv i32 1, %x
--- a/test/Transforms/SCCP/ip-ranges-binaryops.ll
+++ b/test/Transforms/SCCP/ip-ranges-binaryops.ll
@ -0,0 +1,134 @@
+; RUN: opt < %s -ipsccp -S | FileCheck %s
+
+; x = [10, 21), y = [100, 201)
+; x + y = [110, 221)
+define internal i1 @f.add(i32 %x, i32 %y) {
+; CHECK-LABEL: define internal i1 @f.add(i32 %x, i32 %y) {
+; CHECK-NEXT:    %a.1 = add i32 %x, %y
+; CHECK-NEXT:    %c.2 = icmp sgt i32 %a.1, 219
+; CHECK-NEXT:    %c.4 = icmp slt i32 %a.1, 111
+; CHECK-NEXT:    %c.5 = icmp eq i32 %a.1, 150
+; CHECK-NEXT:    %c.6 = icmp slt i32 %a.1, 150
+; CHECK-NEXT:    %res.1 = add i1 false, %c.2
+; CHECK-NEXT:    %res.2 = add i1 %res.1, false
+; CHECK-NEXT:    %res.3 = add i1 %res.2, %c.4
+; CHECK-NEXT:    %res.4 = add i1 %res.3, %c.5
+; CHECK-NEXT:    %res.5 = add i1 %res.4, %c.6
+; CHECK-NEXT:    ret i1 %res.5
+;
+  %a.1 = add i32 %x, %y
+  %c.1 = icmp sgt i32 %a.1, 220
+  %c.2 = icmp sgt i32 %a.1, 219
+  %c.3 = icmp slt i32 %a.1, 110
+  %c.4 = icmp slt i32 %a.1, 111
+  %c.5 = icmp eq i32 %a.1, 150
+  %c.6 = icmp slt i32 %a.1, 150
+  %res.1 = add i1 %c.1, %c.2
+  %res.2 = add i1 %res.1, %c.3
+  %res.3 = add i1 %res.2, %c.4
+  %res.4 = add i1 %res.3, %c.5
+  %res.5 = add i1 %res.4, %c.6
+  ret i1 %res.5
+}
+
+define i1 @caller.add() {
+; CHECK-LABEL:  define i1 @caller.add() {
+; CHECK-NEXT:    %call.1 = tail call i1 @f.add(i32 10, i32 100)
+; CHECK-NEXT:    %call.2 = tail call i1 @f.add(i32 20, i32 200)
+; CHECK-NEXT:    %res = and i1 %call.1, %call.2
+; CHECK-NEXT:    ret i1 %res
+;
+  %call.1 = tail call i1 @f.add(i32 10, i32 100)
+  %call.2 = tail call i1 @f.add(i32 20, i32 200)
+  %res = and i1 %call.1, %call.2
+  ret i1 %res
+}
+
+
+; x = [10, 21), y = [100, 201)
+; x - y = [-190, -79)
+define internal i1 @f.sub(i32 %x, i32 %y) {
+; CHECK-LABEL: define internal i1 @f.sub(i32 %x, i32 %y) {
+; CHECK-NEXT:    %a.1 = sub i32 %x, %y
+; CHECK-NEXT:    %c.2 = icmp sgt i32 %a.1, -81
+; CHECK-NEXT:    %c.4 = icmp slt i32 %a.1, -189
+; CHECK-NEXT:    %c.5 = icmp eq i32 %a.1, -150
+; CHECK-NEXT:    %c.6 = icmp slt i32 %a.1, -150
+; CHECK-NEXT:    %res.1 = add i1 false, %c.2
+; CHECK-NEXT:    %res.2 = add i1 %res.1, false
+; CHECK-NEXT:    %res.3 = add i1 %res.2, %c.4
+; CHECK-NEXT:    %res.4 = add i1 %res.3, %c.5
+; CHECK-NEXT:    %res.5 = add i1 %res.4, %c.6
+; CHECK-NEXT:    ret i1 %res.5
+;
+  %a.1 = sub i32 %x, %y
+  %c.1 = icmp sgt i32 %a.1, -80
+  %c.2 = icmp sgt i32 %a.1, -81
+  %c.3 = icmp slt i32 %a.1, -190
+  %c.4 = icmp slt i32 %a.1, -189
+  %c.5 = icmp eq i32 %a.1, -150
+  %c.6 = icmp slt i32 %a.1, -150
+  %res.1 = add i1 %c.1, %c.2
+  %res.2 = add i1 %res.1, %c.3
+  %res.3 = add i1 %res.2, %c.4
+  %res.4 = add i1 %res.3, %c.5
+  %res.5 = add i1 %res.4, %c.6
+  ret i1 %res.5
+}
+
+define i1 @caller.sub() {
+; CHECK-LABEL:  define i1 @caller.sub() {
+; CHECK-NEXT:    %call.1 = tail call i1 @f.sub(i32 10, i32 100)
+; CHECK-NEXT:    %call.2 = tail call i1 @f.sub(i32 20, i32 200)
+; CHECK-NEXT:    %res = and i1 %call.1, %call.2
+; CHECK-NEXT:    ret i1 %res
+;
+  %call.1 = tail call i1 @f.sub(i32 10, i32 100)
+  %call.2 = tail call i1 @f.sub(i32 20, i32 200)
+  %res = and i1 %call.1, %call.2
+  ret i1 %res
+}
+
+; x = [10, 21), y = [100, 201)
+; x * y = [1000, 4001)
+define internal i1 @f.mul(i32 %x, i32 %y) {
+; CHECK-LABEL: define internal i1 @f.mul(i32 %x, i32 %y) {
+; CHECK-NEXT:    %a.1 = mul i32 %x, %y
+; CHECK-NEXT:    %c.2 = icmp sgt i32 %a.1, 3999
+; CHECK-NEXT:    %c.4 = icmp slt i32 %a.1, 1001
+; CHECK-NEXT:    %c.5 = icmp eq i32 %a.1, 1500
+; CHECK-NEXT:    %c.6 = icmp slt i32 %a.1, 1500
+; CHECK-NEXT:    %res.1 = add i1 false, %c.2
+; CHECK-NEXT:    %res.2 = add i1 %res.1, false
+; CHECK-NEXT:    %res.3 = add i1 %res.2, %c.4
+; CHECK-NEXT:    %res.4 = add i1 %res.3, %c.5
+; CHECK-NEXT:    %res.5 = add i1 %res.4, %c.6
+; CHECK-NEXT:    ret i1 %res.5
+;
+  %a.1 = mul i32 %x, %y
+  %c.1 = icmp sgt i32 %a.1, 4000
+  %c.2 = icmp sgt i32 %a.1, 3999
+  %c.3 = icmp slt i32 %a.1, 1000
+  %c.4 = icmp slt i32 %a.1, 1001
+  %c.5 = icmp eq i32 %a.1, 1500
+  %c.6 = icmp slt i32 %a.1, 1500
+  %res.1 = add i1 %c.1, %c.2
+  %res.2 = add i1 %res.1, %c.3
+  %res.3 = add i1 %res.2, %c.4
+  %res.4 = add i1 %res.3, %c.5
+  %res.5 = add i1 %res.4, %c.6
+  ret i1 %res.5
+}
+
+define i1 @caller.mul() {
+; CHECK-LABEL:  define i1 @caller.mul() {
+; CHECK-NEXT:    %call.1 = tail call i1 @f.mul(i32 10, i32 100)
+; CHECK-NEXT:    %call.2 = tail call i1 @f.mul(i32 20, i32 200)
+; CHECK-NEXT:    %res = and i1 %call.1, %call.2
+; CHECK-NEXT:    ret i1 %res
+;
+  %call.1 = tail call i1 @f.mul(i32 10, i32 100)
+  %call.2 = tail call i1 @f.mul(i32 20, i32 200)
+  %res = and i1 %call.1, %call.2
+  ret i1 %res
+}
--- a/test/Transforms/SCCP/range-and.ll
+++ b/test/Transforms/SCCP/range-and.ll
@ -8,16 +8,13 @@ define void @and_range_limit(i64 %a) {
 ; CHECK-NEXT:    [[R:%.*]] = and i64 [[A:%.*]], 255
 ; CHECK-NEXT:    [[C_0:%.*]] = icmp slt i64 [[R]], 15
 ; CHECK-NEXT:    call void @use(i1 [[C_0]])
-; CHECK-NEXT:    [[C_1:%.*]] = icmp slt i64 [[R]], 256
-; CHECK-NEXT:    call void @use(i1 [[C_1]])
+; CHECK-NEXT:    call void @use(i1 true)
 ; CHECK-NEXT:    [[C_2:%.*]] = icmp eq i64 [[R]], 100
 ; CHECK-NEXT:    call void @use(i1 [[C_2]])
-; CHECK-NEXT:    [[C_3:%.*]] = icmp eq i64 [[R]], 300
-; CHECK-NEXT:    call void @use(i1 [[C_3]])
+; CHECK-NEXT:    call void @use(i1 false)
 ; CHECK-NEXT:    [[C_4:%.*]] = icmp ne i64 [[R]], 100
 ; CHECK-NEXT:    call void @use(i1 [[C_4]])
-; CHECK-NEXT:    [[C_5:%.*]] = icmp ne i64 [[R]], 300
-; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    call void @use(i1 true)
 ; CHECK-NEXT:    ret void
 ;
  %r = and i64 %a, 255
--- a/test/Transforms/SCCP/vector-bitcast.ll
+++ b/test/Transforms/SCCP/vector-bitcast.ll
@ -2,7 +2,8 @@

 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"

-; CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64>* %p
+; FIXME: Add back support for handling special values of vector/fp types.
+; CHECK: store volatile <2 x i64> %and.i119.i, <2 x i64>* %p
 ; rdar://11324230

 define void @foo(<2 x i64>* %p) nounwind {