[ValueTracking] Directly use KnownBits shift functions

Make ValueTracking directly call the KnownBits shift helpers, which provides more precise results. Unfortunately, ValueTracking has a special case where sometimes we determine non-zero shift amounts using isKnownNonZero(). I have my doubts about the usefulness of that special-case (it is only tested in a single unit test), but I've reproduced the special-case via an extra parameter to the KnownBits methods. Differential Revision: https://reviews.llvm.org/D151816
2024-12-04 12:15:46 +00:00 · 2023-05-16 10:55:44 +02:00 · 2023-05-16 10:55:44 +02:00 · dfb369399d
commit dfb369399d
parent 34cf67aef5
10 changed files with 48 additions and 106 deletions
--- a/compiler-rt/test/ubsan/TestCases/Misc/coverage-levels.cpp
+++ b/compiler-rt/test/ubsan/TestCases/Misc/coverage-levels.cpp
@ -28,7 +28,7 @@

 volatile int sink;
 int main(int argc, char **argv) {
-  int shift = argc * 32;
+  int shift = argc * 33;
 #if GOOD_SHIFT
  shift = 3;
 #endif
@ -37,7 +37,7 @@ int main(int argc, char **argv) {
  return 0;
 }

-// CHECK_WARN: shift exponent 32 is too large
+// CHECK_WARN: shift exponent 33 is too large
 // CHECK_NOWARN-NOT: ERROR
 // FIXME: Currently, coverage instrumentation kicks in after ubsan, so we get
 // more than the minimal number of instrumented blocks.
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@ -383,15 +383,18 @@ public:
  /// Compute known bits for shl(LHS, RHS).
  /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS.
  static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS,
-                       bool NUW = false, bool NSW = false);
+                       bool NUW = false, bool NSW = false,
+                       bool ShAmtNonZero = false);

  /// Compute known bits for lshr(LHS, RHS).
  /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS.
-  static KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS);
+  static KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS,
+                        bool ShAmtNonZero = false);

  /// Compute known bits for ashr(LHS, RHS).
  /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS.
-  static KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS);
+  static KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS,
+                        bool ShAmtNonZero = false);

  /// Determine if these known bits always give the same ICMP_EQ result.
  static std::optional<bool> eq(const KnownBits &LHS, const KnownBits &RHS);
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@ -984,79 +984,16 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
 static void computeKnownBitsFromShiftOperator(
    const Operator *I, const APInt &DemandedElts, KnownBits &Known,
    KnownBits &Known2, unsigned Depth, const Query &Q,
-    function_ref<KnownBits(const KnownBits &, const KnownBits &)> KF) {
-  unsigned BitWidth = Known.getBitWidth();
+    function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) {
  computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
  computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
-
-  // Note: We cannot use Known.Zero.getLimitedValue() here, because if
-  // BitWidth > 64 and any upper bits are known, we'll end up returning the
-  // limit value (which implies all bits are known).
-  uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue();
-  uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue();
-  bool ShiftAmtIsConstant = Known.isConstant();
-  bool MaxShiftAmtIsOutOfRange = Known.getMaxValue().uge(BitWidth);
-
-  if (ShiftAmtIsConstant) {
-    Known = KF(Known2, Known);
-    return;
-  }
-
-  // If the shift amount could be greater than or equal to the bit-width of the
-  // LHS, the value could be poison, but bail out because the check below is
-  // expensive.
-  // TODO: Should we just carry on?
-  if (MaxShiftAmtIsOutOfRange) {
-    Known.resetAll();
-    return;
-  }
-
-  // It would be more-clearly correct to use the two temporaries for this
-  // calculation. Reusing the APInts here to prevent unnecessary allocations.
-  Known.resetAll();
-
-  // If we know the shifter operand is nonzero, we can sometimes infer more
-  // known bits. However this is expensive to compute, so be lazy about it and
-  // only compute it when absolutely necessary.
-  std::optional<bool> ShifterOperandIsNonZero;
-
-  // Early exit if we can't constrain any well-defined shift amount.
-  if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) &&
-      !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) {
-    ShifterOperandIsNonZero =
-        isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q);
-    if (!*ShifterOperandIsNonZero)
-      return;
-  }
-
-  Known.Zero.setAllBits();
-  Known.One.setAllBits();
-  for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) {
-    // Combine the shifted known input bits only for those shift amounts
-    // compatible with its known constraints.
-    if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt)
-      continue;
-    if ((ShiftAmt | ShiftAmtKO) != ShiftAmt)
-      continue;
-    // If we know the shifter is nonzero, we may be able to infer more known
-    // bits. This check is sunk down as far as possible to avoid the expensive
-    // call to isKnownNonZero if the cheaper checks above fail.
-    if (ShiftAmt == 0) {
-      if (!ShifterOperandIsNonZero)
-        ShifterOperandIsNonZero =
-            isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q);
-      if (*ShifterOperandIsNonZero)
-        continue;
-    }
-
-    Known = Known.intersectWith(
-        KF(Known2, KnownBits::makeConstant(APInt(32, ShiftAmt))));
-  }
-
-  // If the known bits conflict, the result is poison. Return a 0 and hope the
-  // caller can further optimize that.
-  if (Known.hasConflict())
-    Known.setAllZero();
+  // To limit compile-time impact, only query isKnownNonZero() if we know at
+  // least something about the shift amount.
+  bool ShAmtNonZero =
+      Known.isNonZero() ||
+      (Known.getMaxValue().ult(Known.getBitWidth()) &&
+       isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q));
+  Known = KF(Known2, Known, ShAmtNonZero);
 }

 static KnownBits getKnownBitsFromAndXorOr(const Operator *I,
@ -1355,8 +1292,9 @@ static void computeKnownBitsFromOperator(const Operator *I,
  case Instruction::Shl: {
    bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
    bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
-    auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt) {
-      return KnownBits::shl(KnownVal, KnownAmt, NUW, NSW);
+    auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt,
+                         bool ShAmtNonZero) {
+      return KnownBits::shl(KnownVal, KnownAmt, NUW, NSW, ShAmtNonZero);
    };
    computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
                                      KF);
@ -1367,8 +1305,9 @@ static void computeKnownBitsFromOperator(const Operator *I,
    break;
  }
  case Instruction::LShr: {
-    auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) {
-      return KnownBits::lshr(KnownVal, KnownAmt);
+    auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt,
+                 bool ShAmtNonZero) {
+      return KnownBits::lshr(KnownVal, KnownAmt, ShAmtNonZero);
    };
    computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
                                      KF);
@ -1379,8 +1318,9 @@ static void computeKnownBitsFromOperator(const Operator *I,
    break;
  }
  case Instruction::AShr: {
-    auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) {
-      return KnownBits::ashr(KnownVal, KnownAmt);
+    auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt,
+                 bool ShAmtNonZero) {
+      return KnownBits::ashr(KnownVal, KnownAmt, ShAmtNonZero);
    };
    computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
                                      KF);
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@ -172,7 +172,7 @@ static unsigned getMaxShiftAmount(const APInt &MaxValue, unsigned BitWidth) {
 }

 KnownBits KnownBits::shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW,
-                         bool NSW) {
+                         bool NSW, bool ShAmtNonZero) {
  unsigned BitWidth = LHS.getBitWidth();
  auto ShiftByConst = [&](const KnownBits &LHS, unsigned ShiftAmt) {
    KnownBits Known;
@ -198,6 +198,8 @@ KnownBits KnownBits::shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW,
  // Fast path for a common case when LHS is completely unknown.
  KnownBits Known(BitWidth);
  unsigned MinShiftAmount = RHS.getMinValue().getLimitedValue(BitWidth);
+  if (MinShiftAmount == 0 && ShAmtNonZero)
+    MinShiftAmount = 1;
  if (LHS.isUnknown()) {
    Known.Zero.setLowBits(MinShiftAmount);
    if (NUW && NSW && MinShiftAmount != 0)
@ -254,7 +256,8 @@ KnownBits KnownBits::shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW,
  return Known;
 }

-KnownBits KnownBits::lshr(const KnownBits &LHS, const KnownBits &RHS) {
+KnownBits KnownBits::lshr(const KnownBits &LHS, const KnownBits &RHS,
+                          bool ShAmtNonZero) {
  unsigned BitWidth = LHS.getBitWidth();
  auto ShiftByConst = [&](const KnownBits &LHS, unsigned ShiftAmt) {
    KnownBits Known = LHS;
@ -268,6 +271,8 @@ KnownBits KnownBits::lshr(const KnownBits &LHS, const KnownBits &RHS) {
  // Fast path for a common case when LHS is completely unknown.
  KnownBits Known(BitWidth);
  unsigned MinShiftAmount = RHS.getMinValue().getLimitedValue(BitWidth);
+  if (MinShiftAmount == 0 && ShAmtNonZero)
+    MinShiftAmount = 1;
  if (LHS.isUnknown()) {
    Known.Zero.setHighBits(MinShiftAmount);
    return Known;
@ -297,7 +302,8 @@ KnownBits KnownBits::lshr(const KnownBits &LHS, const KnownBits &RHS) {
  return Known;
 }

-KnownBits KnownBits::ashr(const KnownBits &LHS, const KnownBits &RHS) {
+KnownBits KnownBits::ashr(const KnownBits &LHS, const KnownBits &RHS,
+                          bool ShAmtNonZero) {
  unsigned BitWidth = LHS.getBitWidth();
  auto ShiftByConst = [&](const KnownBits &LHS, unsigned ShiftAmt) {
    KnownBits Known = LHS;
@ -309,6 +315,8 @@ KnownBits KnownBits::ashr(const KnownBits &LHS, const KnownBits &RHS) {
  // Fast path for a common case when LHS is completely unknown.
  KnownBits Known(BitWidth);
  unsigned MinShiftAmount = RHS.getMinValue().getLimitedValue(BitWidth);
+  if (MinShiftAmount == 0 && ShAmtNonZero)
+    MinShiftAmount = 1;
  if (LHS.isUnknown()) {
    if (MinShiftAmount == BitWidth) {
      // Always poison. Return zero because we don't like returning conflict.
--- a/llvm/test/Analysis/ScalarEvolution/extract-highbits-variablemask.ll
+++ b/llvm/test/Analysis/ScalarEvolution/extract-highbits-variablemask.ll
@ -40,7 +40,7 @@ define i32 @mask_b(i32 %val, i32 %numlowbits) nounwind {
 ; CHECK-LABEL: 'mask_b'
 ; CHECK-NEXT:  Classifying expressions for: @mask_b
 ; CHECK-NEXT:    %mask = shl i32 -1, %numlowbits
-; CHECK-NEXT:    --> %mask U: full-set S: full-set
+; CHECK-NEXT:    --> %mask U: [-2147483648,0) S: [-2147483648,0)
 ; CHECK-NEXT:    %masked = and i32 %mask, %val
 ; CHECK-NEXT:    --> %masked U: full-set S: full-set
 ; CHECK-NEXT:  Determining loop execution counts for: @mask_b
--- a/llvm/test/Analysis/ScalarEvolution/extract-lowbits-variablemask.ll
+++ b/llvm/test/Analysis/ScalarEvolution/extract-lowbits-variablemask.ll
@ -45,11 +45,11 @@ define i32 @mask_b(i32 %val, i32 %numlowbits) nounwind {
 ; CHECK-LABEL: 'mask_b'
 ; CHECK-NEXT:  Classifying expressions for: @mask_b
 ; CHECK-NEXT:    %notmask = shl i32 -1, %numlowbits
-; CHECK-NEXT:    --> %notmask U: full-set S: full-set
+; CHECK-NEXT:    --> %notmask U: [-2147483648,0) S: [-2147483648,0)
 ; CHECK-NEXT:    %mask = xor i32 %notmask, -1
-; CHECK-NEXT:    --> (-1 + (-1 * %notmask)) U: full-set S: full-set
+; CHECK-NEXT:    --> (-1 + (-1 * %notmask)) U: [0,-2147483648) S: [0,-2147483648)
 ; CHECK-NEXT:    %masked = and i32 %mask, %val
-; CHECK-NEXT:    --> %masked U: full-set S: full-set
+; CHECK-NEXT:    --> %masked U: [0,-2147483648) S: [0,-2147483648)
 ; CHECK-NEXT:  Determining loop execution counts for: @mask_b
 ;
  %notmask = shl i32 -1, %numlowbits
@ -64,7 +64,7 @@ define i32 @mask_c(i32 %val, i32 %numlowbits) nounwind {
 ; CHECK-NEXT:    %numhighbits = sub i32 32, %numlowbits
 ; CHECK-NEXT:    --> (32 + (-1 * %numlowbits)) U: full-set S: full-set
 ; CHECK-NEXT:    %mask = lshr i32 -1, %numhighbits
-; CHECK-NEXT:    --> %mask U: full-set S: full-set
+; CHECK-NEXT:    --> %mask U: [1,0) S: [1,0)
 ; CHECK-NEXT:    %masked = and i32 %mask, %val
 ; CHECK-NEXT:    --> %masked U: full-set S: full-set
 ; CHECK-NEXT:  Determining loop execution counts for: @mask_c
--- a/llvm/test/Analysis/ScalarEvolution/shift-recurrences.ll
+++ b/llvm/test/Analysis/ScalarEvolution/shift-recurrences.ll
@ -152,7 +152,7 @@ define void @test_ashr_wrong_op(i64 %start) {
 ; CHECK-NEXT:    %iv.ashr = phi i64 [ %start, %entry ], [ %iv.ashr.next, %loop ]
 ; CHECK-NEXT:    --> %iv.ashr U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %iv.ashr.next = ashr i64 1, %iv.ashr
-; CHECK-NEXT:    --> %iv.ashr.next U: [-2,2) S: [-2,2) Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
+; CHECK-NEXT:    --> %iv.ashr.next U: [0,2) S: [0,2) Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_ashr_wrong_op
 ; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
 ; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
@ -375,7 +375,7 @@ define void @test_shl7(i1 %c, i64 %shiftamt) {
 ; CHECK-NEXT:    %iv.next = add i64 %iv, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%loop> U: [1,6) S: [1,6) Exits: 5 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.shl.next = shl i64 %iv.shl, %shiftamt
-; CHECK-NEXT:    --> %iv.shl.next U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
+; CHECK-NEXT:    --> %iv.shl.next U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_shl7
 ; CHECK-NEXT:  Loop %loop: backedge-taken count is 4
 ; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is 4
--- a/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll
+++ b/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll
@ -294,15 +294,9 @@ define i64 @known_power_of_two_urem_loop_ashr_negative(i64 %size, i64 %a) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ -9223372036854775808, [[ENTRY:%.*]] ], [ [[I:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]]
-; CHECK-NEXT:    [[ADD]] = add nsw i64 [[SUM]], [[UREM]]
-; CHECK-NEXT:    [[I]] = ashr i64 [[PHI]], [[A:%.*]]
-; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i64 [[I]], 1
-; CHECK-NEXT:    br i1 [[ICMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK-NEXT:    br i1 true, label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    ret i64 [[SUM]]
+; CHECK-NEXT:    ret i64 poison
 ;
 entry:
  br label %for.body
--- a/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll
+++ b/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll
@ -12,7 +12,7 @@ define void @test_01(ptr %p, i32 %shift) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
-; CHECK-NEXT:    [[LESS_THAN_SHIFTED:%.*]] = icmp slt i32 [[IV]], [[X_SHIFTED]]
+; CHECK-NEXT:    [[LESS_THAN_SHIFTED:%.*]] = icmp ult i32 [[IV]], [[X_SHIFTED]]
 ; CHECK-NEXT:    br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]]
 ; CHECK:       guarded:
 ; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
@ -68,7 +68,7 @@ define void @test_02(ptr %p, i32 %shift) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
-; CHECK-NEXT:    [[LESS_THAN_SHIFTED:%.*]] = icmp sgt i32 [[X_SHIFTED]], [[IV]]
+; CHECK-NEXT:    [[LESS_THAN_SHIFTED:%.*]] = icmp ugt i32 [[X_SHIFTED]], [[IV]]
 ; CHECK-NEXT:    br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]]
 ; CHECK:       guarded:
 ; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
--- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
+++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
@ -100,10 +100,7 @@ define i1 @knownbits_out_of_range_shift(i32 %x) {
 ; CHECK:       block1:
 ; CHECK-NEXT:    br label [[BLOCK2]]
 ; CHECK:       block2:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 63, [[ENTRY:%.*]] ], [ 31, [[BLOCK1:%.*]] ]
-; CHECK-NEXT:    [[L:%.*]] = lshr i32 [[X:%.*]], [[P]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[L]], 2
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 false
 ;
 entry:
  br label %block2