[InstCombine] allow (X != C1 && X != C2) and similar patterns to match splat vector constants

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300402 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-23 12:40:38 +00:00 · 2017-04-15 17:55:06 +00:00 · 2017-04-15 17:55:06 +00:00 · c4cce50c34
commit c4cce50c34
parent 55d1225376
2 changed files with 26 additions and 28 deletions
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@ -728,13 +728,13 @@ static Value *
 foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
                                     bool JoinedByAnd,
                                     InstCombiner::BuilderTy *Builder) {
-  Value *X = LHS->getOperand(0);  if (X != RHS->getOperand(0))
+  Value *X = LHS->getOperand(0);
+  if (X != RHS->getOperand(0))
    return nullptr;

-  // FIXME: This should use m_APInt and work with splat vector constants.
-  auto *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
-  auto *RHSC = dyn_cast<ConstantInt>(RHS->getOperand(1));
-  if (!LHSC || !RHSC)
+  const APInt *C1, *C2;
+  if (!match(LHS->getOperand(1), m_APInt(C1)) ||
+      !match(RHS->getOperand(1), m_APInt(C2)))
    return nullptr;

  // We only handle (X != C1 && X != C2) and (X == C1 || X == C2).
@ -747,10 +747,10 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
    return nullptr;

  // The larger unsigned constant goes on the right.
-  if (LHSC->getValue().ugt(RHSC->getValue()))
-    std::swap(LHSC, RHSC);
+  if (C1->ugt(*C2))
+    std::swap(C1, C2);

-  APInt Xor = LHSC->getValue() ^ RHSC->getValue();
+  APInt Xor = *C1 ^ *C2;
  if (Xor.isPowerOf2()) {
    // If LHSC and RHSC differ by only one bit, then set that bit in X and
    // compare against the larger constant:
@ -759,19 +759,19 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
    // We choose an 'or' with a Pow2 constant rather than the inverse mask with
    // 'and' because that may lead to smaller codegen from a smaller constant.
    Value *Or = Builder->CreateOr(X, ConstantInt::get(X->getType(), Xor));
-    return Builder->CreateICmp(Pred, Or, RHSC);
+    return Builder->CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2));
  }

  // Special case: get the ordering right when the values wrap around zero.
  // Ie, we assumed the constants were unsigned when swapping earlier.
-  if (LHSC->getValue() == 0 && RHSC->getValue().isAllOnesValue())
-    std::swap(LHSC, RHSC);
+  if (*C1 == 0 && C2->isAllOnesValue())
+    std::swap(C1, C2);

-  if (LHSC == SubOne(RHSC)) {
+  if (*C1 == *C2 - 1) {
    // (X == 13 || X == 14) --> X - 13 <=u 1
    // (X != 13 && X != 14) --> X - 13  >u 1
    // An 'add' is the canonical IR form, so favor that over a 'sub'.
-    Value *Add = Builder->CreateAdd(X, ConstantExpr::getNeg(LHSC));
+    Value *Add = Builder->CreateAdd(X, ConstantInt::get(X->getType(), -(*C1)));
    auto NewPred = JoinedByAnd ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULE;
    return Builder->CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1));
  }
@ -809,6 +809,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
  if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/false))
    return V;

+  if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, true, Builder))
+    return V;
+
  // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
  Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
  ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
@ -878,9 +881,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
  if (!PredicatesFoldable(PredL, PredR))
    return nullptr;

-  if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, true, Builder))
-    return V;
-
  // Ensure that the larger constant is on the RHS.
  bool ShouldSwap;
  if (CmpInst::isSigned(PredL) ||
@ -1754,6 +1754,9 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
  if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/true))
    return V;

+  if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, false, Builder))
+    return V;
+
  // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
  if (!LHSC || !RHSC)
    return nullptr;
@ -1791,9 +1794,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
  if (!PredicatesFoldable(PredL, PredR))
    return nullptr;

-  if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, false, Builder))
-    return V;
-
  // Ensure that the larger constant is on the RHS.
  bool ShouldSwap;
  if (CmpInst::isSigned(PredL) ||
--- a/test/Transforms/InstCombine/and-or-icmps.ll
+++ b/test/Transforms/InstCombine/and-or-icmps.ll
@ -177,14 +177,13 @@ define i1 @and_ne_with_diff_one_signed(i64 %x) {
  ret i1 %and
 }

-; FIXME: Vectors with splat constants get the same folds.
+; Vectors with splat constants get the same folds.

 define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) {
 ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2_splatvec(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq <2 x i32> %x, <i32 97, i32 97>
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq <2 x i32> %x, <i32 65, i32 65>
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i1> [[CMP1]], [[CMP2]]
-; CHECK-NEXT:    ret <2 x i1> [[OR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> %x, <i32 32, i32 32>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 97, i32 97>
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
  %cmp1 = icmp eq <2 x i32> %x, <i32 97, i32 97>
  %cmp2 = icmp eq <2 x i32> %x, <i32 65, i32 65>
@ -194,10 +193,9 @@ define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) {

 define <2 x i1> @and_ne_with_diff_one_splatvec(<2 x i32> %x) {
 ; CHECK-LABEL: @and_ne_with_diff_one_splatvec(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne <2 x i32> %x, <i32 40, i32 40>
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne <2 x i32> %x, <i32 39, i32 39>
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i1> [[CMP1]], [[CMP2]]
-; CHECK-NEXT:    ret <2 x i1> [[AND]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> %x, <i32 -39, i32 -39>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
  %cmp1 = icmp ne <2 x i32> %x, <i32 40, i32 40>
  %cmp2 = icmp ne <2 x i32> %x, <i32 39, i32 39>