[SCCP] Use constant ranges for binary operators.

If one of the operands of a binary operator is a constant range, we can use ConstantRange::binaryOp to approximate the result. We still handle single element constant ranges as we did previously, with ConstantExpr::get(), because ConstantRange::binaryOp still gives worse results in a few cases for single element ranges. Also note that we bail out early if any of the operands is still unknown. Reviewers: davide, efriedma, mssimpso Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D71936
2024-12-11 05:24:16 +00:00 · 2020-03-19 09:24:09 +00:00 · 2020-03-19 09:24:09 +00:00 · b3d85ce4e1
commit b3d85ce4e1
parent b875f44290
5 changed files with 165 additions and 56 deletions
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@ -977,9 +977,18 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
  LatticeVal V2State = getValueState(I.getOperand(1));
  LatticeVal &IV = ValueState[&I];
-  if (isOverdefined(IV))
+  if (IV.isOverdefined())
    return;
  // If something is undef, wait for it to resolve.
  if (V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef())
    return;
  if (V1State.isOverdefined() && V2State.isOverdefined())
    return (void)markOverdefined(&I);
  // Both operands are non-integer constants or constant expressions.
  // TODO: Use information from notconstant better.
  if (isConstant(V1State) && isConstant(V2State)) {
    Constant *C = ConstantExpr::get(I.getOpcode(), getConstant(V1State),
                                    getConstant(V2State));
@ -989,50 +998,21 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
    return (void)markConstant(IV, &I, C);
  }
-  // If something is undef, wait for it to resolve.
+  // Operands are either constant ranges, notconstant, overdefined or one of the
-  if (V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef())
+  // operands is a constant.
-    return;
+  ConstantRange A = ConstantRange::getFull(I.getType()->getScalarSizeInBits());
  ConstantRange B = ConstantRange::getFull(I.getType()->getScalarSizeInBits());
  if (V1State.isConstantRange())
    A = V1State.getConstantRange();
  if (V2State.isConstantRange())
    B = V2State.getConstantRange();
-  // Otherwise, one of our operands is overdefined.  Try to produce something
+  ConstantRange R = A.binaryOp(cast<BinaryOperator>(&I)->getOpcode(), B);
-  // better than overdefined with some tricks.
+  mergeInValue(&I, LatticeVal::getRange(R));
  // If this is 0 / Y, it doesn't matter that the second operand is
  // overdefined, and we can replace it with zero.
  if (I.getOpcode() == Instruction::UDiv || I.getOpcode() == Instruction::SDiv)
    if (isConstant(V1State) && getConstant(V1State)->isNullValue())
      return (void)markConstant(IV, &I, getConstant(V1State));
-  // If this is:
+  // TODO: Currently we do not exploit special values that produce something
-  // -> AND/MUL with 0
+  // better than overdefined with an overdefined operand for vector or floating
-  // -> OR with -1
+  // point types, like and <4 x i32> overdefined, zeroinitializer.
  // it doesn't matter that the other operand is overdefined.
  if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Mul ||
      I.getOpcode() == Instruction::Or) {
    LatticeVal *NonOverdefVal = nullptr;
    if (!isOverdefined(V1State))
      NonOverdefVal = &V1State;
    else if (!isOverdefined(V2State))
      NonOverdefVal = &V2State;
    if (NonOverdefVal) {
      if (!isConstant(*NonOverdefVal))
        return;
      if (I.getOpcode() == Instruction::And ||
          I.getOpcode() == Instruction::Mul) {
        // X and 0 = 0
        // X * 0 = 0
        if (getConstant(*NonOverdefVal)->isNullValue())
          return (void)markConstant(IV, &I, getConstant(*NonOverdefVal));
      } else {
        // X or -1 = -1
        if (ConstantInt *CI = getConstantInt(*NonOverdefVal))
          if (CI->isMinusOne())
            return (void)markConstant(IV, &I, CI);
      }
    }
  }
  markOverdefined(&I);
 }
 // Handle ICmpInst instruction.
--- a/test/Transforms/SCCP/binaryops-range-special-cases.ll
+++ b/test/Transforms/SCCP/binaryops-range-special-cases.ll
@ -7,16 +7,13 @@ define void @sdiv1_cmp_constants(i32 %x) {
 ; CHECK-NEXT:    [[D:%.*]] = sdiv i32 1, [[X:%.*]]
 ; CHECK-NEXT:    [[C_0:%.*]] = icmp slt i32 0, [[D]]
 ; CHECK-NEXT:    call void @use(i1 [[C_0]])
-; CHECK-NEXT:    [[C_1:%.*]] = icmp slt i32 1, [[D]]
+; CHECK-NEXT:    call void @use(i1 false)
-; CHECK-NEXT:    call void @use(i1 [[C_1]])
+; CHECK-NEXT:    call void @use(i1 false)
 ; CHECK-NEXT:    [[C_2:%.*]] = icmp slt i32 2, [[D]]
 ; CHECK-NEXT:    call void @use(i1 [[C_2]])
 ; CHECK-NEXT:    [[C_3:%.*]] = icmp eq i32 1, [[D]]
 ; CHECK-NEXT:    call void @use(i1 [[C_3]])
 ; CHECK-NEXT:    [[C_4:%.*]] = icmp eq i32 0, [[D]]
 ; CHECK-NEXT:    call void @use(i1 [[C_4]])
-; CHECK-NEXT:    [[C_5:%.*]] = icmp eq i32 2, [[D]]
+; CHECK-NEXT:    call void @use(i1 false)
 ; CHECK-NEXT:    call void @use(i1 [[C_5]])
 ; CHECK-NEXT:    ret void
 ;
  %d = sdiv i32 1, %x
--- a/test/Transforms/SCCP/ip-ranges-binaryops.ll
+++ b/test/Transforms/SCCP/ip-ranges-binaryops.ll
@ -0,0 +1,134 @@
 ; RUN: opt < %s -ipsccp -S | FileCheck %s
 ; x = [10, 21), y = [100, 201)
 ; x + y = [110, 221)
 define internal i1 @f.add(i32 %x, i32 %y) {
 ; CHECK-LABEL: define internal i1 @f.add(i32 %x, i32 %y) {
 ; CHECK-NEXT:    %a.1 = add i32 %x, %y
 ; CHECK-NEXT:    %c.2 = icmp sgt i32 %a.1, 219
 ; CHECK-NEXT:    %c.4 = icmp slt i32 %a.1, 111
 ; CHECK-NEXT:    %c.5 = icmp eq i32 %a.1, 150
 ; CHECK-NEXT:    %c.6 = icmp slt i32 %a.1, 150
 ; CHECK-NEXT:    %res.1 = add i1 false, %c.2
 ; CHECK-NEXT:    %res.2 = add i1 %res.1, false
 ; CHECK-NEXT:    %res.3 = add i1 %res.2, %c.4
 ; CHECK-NEXT:    %res.4 = add i1 %res.3, %c.5
 ; CHECK-NEXT:    %res.5 = add i1 %res.4, %c.6
 ; CHECK-NEXT:    ret i1 %res.5
 ;
  %a.1 = add i32 %x, %y
  %c.1 = icmp sgt i32 %a.1, 220
  %c.2 = icmp sgt i32 %a.1, 219
  %c.3 = icmp slt i32 %a.1, 110
  %c.4 = icmp slt i32 %a.1, 111
  %c.5 = icmp eq i32 %a.1, 150
  %c.6 = icmp slt i32 %a.1, 150
  %res.1 = add i1 %c.1, %c.2
  %res.2 = add i1 %res.1, %c.3
  %res.3 = add i1 %res.2, %c.4
  %res.4 = add i1 %res.3, %c.5
  %res.5 = add i1 %res.4, %c.6
  ret i1 %res.5
 }
 define i1 @caller.add() {
 ; CHECK-LABEL:  define i1 @caller.add() {
 ; CHECK-NEXT:    %call.1 = tail call i1 @f.add(i32 10, i32 100)
 ; CHECK-NEXT:    %call.2 = tail call i1 @f.add(i32 20, i32 200)
 ; CHECK-NEXT:    %res = and i1 %call.1, %call.2
 ; CHECK-NEXT:    ret i1 %res
 ;
  %call.1 = tail call i1 @f.add(i32 10, i32 100)
  %call.2 = tail call i1 @f.add(i32 20, i32 200)
  %res = and i1 %call.1, %call.2
  ret i1 %res
 }
 ; x = [10, 21), y = [100, 201)
 ; x - y = [-190, -79)
 define internal i1 @f.sub(i32 %x, i32 %y) {
 ; CHECK-LABEL: define internal i1 @f.sub(i32 %x, i32 %y) {
 ; CHECK-NEXT:    %a.1 = sub i32 %x, %y
 ; CHECK-NEXT:    %c.2 = icmp sgt i32 %a.1, -81
 ; CHECK-NEXT:    %c.4 = icmp slt i32 %a.1, -189
 ; CHECK-NEXT:    %c.5 = icmp eq i32 %a.1, -150
 ; CHECK-NEXT:    %c.6 = icmp slt i32 %a.1, -150
 ; CHECK-NEXT:    %res.1 = add i1 false, %c.2
 ; CHECK-NEXT:    %res.2 = add i1 %res.1, false
 ; CHECK-NEXT:    %res.3 = add i1 %res.2, %c.4
 ; CHECK-NEXT:    %res.4 = add i1 %res.3, %c.5
 ; CHECK-NEXT:    %res.5 = add i1 %res.4, %c.6
 ; CHECK-NEXT:    ret i1 %res.5
 ;
  %a.1 = sub i32 %x, %y
  %c.1 = icmp sgt i32 %a.1, -80
  %c.2 = icmp sgt i32 %a.1, -81
  %c.3 = icmp slt i32 %a.1, -190
  %c.4 = icmp slt i32 %a.1, -189
  %c.5 = icmp eq i32 %a.1, -150
  %c.6 = icmp slt i32 %a.1, -150
  %res.1 = add i1 %c.1, %c.2
  %res.2 = add i1 %res.1, %c.3
  %res.3 = add i1 %res.2, %c.4
  %res.4 = add i1 %res.3, %c.5
  %res.5 = add i1 %res.4, %c.6
  ret i1 %res.5
 }
 define i1 @caller.sub() {
 ; CHECK-LABEL:  define i1 @caller.sub() {
 ; CHECK-NEXT:    %call.1 = tail call i1 @f.sub(i32 10, i32 100)
 ; CHECK-NEXT:    %call.2 = tail call i1 @f.sub(i32 20, i32 200)
 ; CHECK-NEXT:    %res = and i1 %call.1, %call.2
 ; CHECK-NEXT:    ret i1 %res
 ;
  %call.1 = tail call i1 @f.sub(i32 10, i32 100)
  %call.2 = tail call i1 @f.sub(i32 20, i32 200)
  %res = and i1 %call.1, %call.2
  ret i1 %res
 }
 ; x = [10, 21), y = [100, 201)
 ; x * y = [1000, 4001)
 define internal i1 @f.mul(i32 %x, i32 %y) {
 ; CHECK-LABEL: define internal i1 @f.mul(i32 %x, i32 %y) {
 ; CHECK-NEXT:    %a.1 = mul i32 %x, %y
 ; CHECK-NEXT:    %c.2 = icmp sgt i32 %a.1, 3999
 ; CHECK-NEXT:    %c.4 = icmp slt i32 %a.1, 1001
 ; CHECK-NEXT:    %c.5 = icmp eq i32 %a.1, 1500
 ; CHECK-NEXT:    %c.6 = icmp slt i32 %a.1, 1500
 ; CHECK-NEXT:    %res.1 = add i1 false, %c.2
 ; CHECK-NEXT:    %res.2 = add i1 %res.1, false
 ; CHECK-NEXT:    %res.3 = add i1 %res.2, %c.4
 ; CHECK-NEXT:    %res.4 = add i1 %res.3, %c.5
 ; CHECK-NEXT:    %res.5 = add i1 %res.4, %c.6
 ; CHECK-NEXT:    ret i1 %res.5
 ;
  %a.1 = mul i32 %x, %y
  %c.1 = icmp sgt i32 %a.1, 4000
  %c.2 = icmp sgt i32 %a.1, 3999
  %c.3 = icmp slt i32 %a.1, 1000
  %c.4 = icmp slt i32 %a.1, 1001
  %c.5 = icmp eq i32 %a.1, 1500
  %c.6 = icmp slt i32 %a.1, 1500
  %res.1 = add i1 %c.1, %c.2
  %res.2 = add i1 %res.1, %c.3
  %res.3 = add i1 %res.2, %c.4
  %res.4 = add i1 %res.3, %c.5
  %res.5 = add i1 %res.4, %c.6
  ret i1 %res.5
 }
 define i1 @caller.mul() {
 ; CHECK-LABEL:  define i1 @caller.mul() {
 ; CHECK-NEXT:    %call.1 = tail call i1 @f.mul(i32 10, i32 100)
 ; CHECK-NEXT:    %call.2 = tail call i1 @f.mul(i32 20, i32 200)
 ; CHECK-NEXT:    %res = and i1 %call.1, %call.2
 ; CHECK-NEXT:    ret i1 %res
 ;
  %call.1 = tail call i1 @f.mul(i32 10, i32 100)
  %call.2 = tail call i1 @f.mul(i32 20, i32 200)
  %res = and i1 %call.1, %call.2
  ret i1 %res
 }
--- a/test/Transforms/SCCP/range-and.ll
+++ b/test/Transforms/SCCP/range-and.ll
@ -8,16 +8,13 @@ define void @and_range_limit(i64 %a) {
 ; CHECK-NEXT:    [[R:%.*]] = and i64 [[A:%.*]], 255
 ; CHECK-NEXT:    [[C_0:%.*]] = icmp slt i64 [[R]], 15
 ; CHECK-NEXT:    call void @use(i1 [[C_0]])
-; CHECK-NEXT:    [[C_1:%.*]] = icmp slt i64 [[R]], 256
+; CHECK-NEXT:    call void @use(i1 true)
 ; CHECK-NEXT:    call void @use(i1 [[C_1]])
 ; CHECK-NEXT:    [[C_2:%.*]] = icmp eq i64 [[R]], 100
 ; CHECK-NEXT:    call void @use(i1 [[C_2]])
-; CHECK-NEXT:    [[C_3:%.*]] = icmp eq i64 [[R]], 300
+; CHECK-NEXT:    call void @use(i1 false)
 ; CHECK-NEXT:    call void @use(i1 [[C_3]])
 ; CHECK-NEXT:    [[C_4:%.*]] = icmp ne i64 [[R]], 100
 ; CHECK-NEXT:    call void @use(i1 [[C_4]])
-; CHECK-NEXT:    [[C_5:%.*]] = icmp ne i64 [[R]], 300
+; CHECK-NEXT:    call void @use(i1 true)
 ; CHECK-NEXT:    call void @use(i1 [[C_5]])
 ; CHECK-NEXT:    ret void
 ;
  %r = and i64 %a, 255
--- a/test/Transforms/SCCP/vector-bitcast.ll
+++ b/test/Transforms/SCCP/vector-bitcast.ll
@ -2,7 +2,8 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
-; CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64>* %p
+; FIXME: Add back support for handling special values of vector/fp types.
 ; CHECK: store volatile <2 x i64> %and.i119.i, <2 x i64>* %p
 ; rdar://11324230
 define void @foo(<2 x i64>* %p) nounwind {