mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-26 05:36:36 +00:00
[InstCombine] Fold "x ?% y ==/!= 0" to "x & (y-1) ==/!= 0" iff y is power-of-two
Summary: I have stumbled into this by accident while preparing to extend backend `x s% C ==/!= 0` handling. While we did happen to handle this fold in most of the cases, the folding is indirect - we fold `x u% y` to `x & (y-1)` (iff `y` is power-of-two), or first turn `x s% -y` to `x u% y`; that does handle most of the cases. But we can't turn `x s% INT_MIN` to `x u% -INT_MIN`, and thus we end up being stuck with `(x s% INT_MIN) == 0`. There is no such restriction for the more general fold: https://rise4fun.com/Alive/IIeS To be noted, the fold does not enforce that `y` is a constant, so it may indeed increase instruction count. This is consistent with what `x u% y`->`x & (y-1)` already does. I think it makes sense, it's at most one (simple) extra instruction, while `rem`ainder is really much more un-simple (and likely **very** costly). Reviewers: spatel, RKSimon, nikic, xbolva00, craig.topper Reviewed By: RKSimon Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65046 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@367322 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6b285431fd
commit
64018f7f99
@ -1003,6 +1003,12 @@ struct is_idiv_op {
|
||||
}
|
||||
};
|
||||
|
||||
struct is_irem_op {
|
||||
bool isOpType(unsigned Opcode) {
|
||||
return Opcode == Instruction::SRem || Opcode == Instruction::URem;
|
||||
}
|
||||
};
|
||||
|
||||
/// Matches shift operations.
|
||||
template <typename LHS, typename RHS>
|
||||
inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L,
|
||||
@ -1038,6 +1044,13 @@ inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L,
|
||||
return BinOpPred_match<LHS, RHS, is_idiv_op>(L, R);
|
||||
}
|
||||
|
||||
/// Matches integer remainder operations.
|
||||
template <typename LHS, typename RHS>
|
||||
inline BinOpPred_match<LHS, RHS, is_irem_op> m_IRem(const LHS &L,
|
||||
const RHS &R) {
|
||||
return BinOpPred_match<LHS, RHS, is_irem_op>(L, R);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Class that matches exact binary ops.
|
||||
//
|
||||
|
@ -1317,6 +1317,27 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
|
||||
return ExtractValueInst::Create(Call, 1, "sadd.overflow");
|
||||
}
|
||||
|
||||
/// If we have:
|
||||
/// icmp eq/ne (urem/srem %x, %y), 0
|
||||
/// iff %y is a power-of-two, we can replace this with a bit test:
|
||||
/// icmp eq/ne (and %x, (add %y, -1)), 0
|
||||
Instruction *InstCombiner::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) {
|
||||
// This fold is only valid for equality predicates.
|
||||
if (!I.isEquality())
|
||||
return nullptr;
|
||||
ICmpInst::Predicate Pred;
|
||||
Value *X, *Y, *Zero;
|
||||
if (!match(&I, m_ICmp(Pred, m_OneUse(m_IRem(m_Value(X), m_Value(Y))),
|
||||
m_CombineAnd(m_Zero(), m_Value(Zero)))))
|
||||
return nullptr;
|
||||
if (!isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, 0, &I))
|
||||
return nullptr;
|
||||
// This may increase instruction count, we don't enforce that Y is a constant.
|
||||
Value *Mask = Builder.CreateAdd(Y, Constant::getAllOnesValue(Y->getType()));
|
||||
Value *Masked = Builder.CreateAnd(X, Mask);
|
||||
return ICmpInst::Create(Instruction::ICmp, Pred, Masked, Zero);
|
||||
}
|
||||
|
||||
// Handle icmp pred X, 0
|
||||
Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
|
||||
CmpInst::Predicate Pred = Cmp.getPredicate();
|
||||
@ -1335,6 +1356,9 @@ Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
|
||||
}
|
||||
}
|
||||
|
||||
if (Instruction *New = foldIRemByPowerOfTwoToBitTest(Cmp))
|
||||
return New;
|
||||
|
||||
// Given:
|
||||
// icmp eq/ne (urem %x, %y), 0
|
||||
// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
|
||||
|
@ -839,6 +839,7 @@ private:
|
||||
Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp);
|
||||
Instruction *foldICmpBinOp(ICmpInst &Cmp);
|
||||
Instruction *foldICmpEquality(ICmpInst &Cmp);
|
||||
Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I);
|
||||
Instruction *foldICmpWithZero(ICmpInst &Cmp);
|
||||
|
||||
Instruction *foldICmpSelectConstant(ICmpInst &Cmp, SelectInst *Select,
|
||||
|
@ -1319,6 +1319,8 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
|
||||
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
|
||||
Type *Ty = I.getType();
|
||||
if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) {
|
||||
// This may increase instruction count, we don't enforce that Y is a
|
||||
// constant.
|
||||
Constant *N1 = Constant::getAllOnesValue(Ty);
|
||||
Value *Add = Builder.CreateAdd(Op1, N1);
|
||||
return BinaryOperator::CreateAnd(Op0, Add);
|
||||
|
@ -201,8 +201,8 @@ define <2 x i19> @weird_vec_power_of_2_constant_splat_divisor(<2 x i19> %A) {
|
||||
|
||||
define i1 @test3a(i32 %A) {
|
||||
; CHECK-LABEL: @test3a(
|
||||
; CHECK-NEXT: [[B1:%.*]] = and i32 [[A:%.*]], 7
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[B1]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 7
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[TMP1]], 0
|
||||
; CHECK-NEXT: ret i1 [[C]]
|
||||
;
|
||||
%B = srem i32 %A, -8
|
||||
@ -212,8 +212,8 @@ define i1 @test3a(i32 %A) {
|
||||
|
||||
define <2 x i1> @test3a_vec(<2 x i32> %A) {
|
||||
; CHECK-LABEL: @test3a_vec(
|
||||
; CHECK-NEXT: [[B1:%.*]] = and <2 x i32> [[A:%.*]], <i32 7, i32 7>
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[B1]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 7, i32 7>
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
|
||||
; CHECK-NEXT: ret <2 x i1> [[C]]
|
||||
;
|
||||
%B = srem <2 x i32> %A, <i32 -8, i32 -8>
|
||||
@ -681,8 +681,8 @@ define <2 x i1> @test24_vec(<2 x i32> %A) {
|
||||
|
||||
define i1 @test25(i32 %A) {
|
||||
; CHECK-LABEL: @test25(
|
||||
; CHECK-NEXT: [[B:%.*]] = srem i32 [[A:%.*]], -2147483648
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[B]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 2147483647
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[TMP1]], 0
|
||||
; CHECK-NEXT: ret i1 [[C]]
|
||||
;
|
||||
%B = srem i32 %A, 2147483648 ; signbit
|
||||
@ -692,8 +692,8 @@ define i1 @test25(i32 %A) {
|
||||
|
||||
define <2 x i1> @test25_vec(<2 x i32> %A) {
|
||||
; CHECK-LABEL: @test25_vec(
|
||||
; CHECK-NEXT: [[B:%.*]] = srem <2 x i32> [[A:%.*]], <i32 -2147483648, i32 -2147483648>
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[B]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647>
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
|
||||
; CHECK-NEXT: ret <2 x i1> [[C]]
|
||||
;
|
||||
%B = srem <2 x i32> %A, <i32 2147483648, i32 2147483648>
|
||||
@ -703,9 +703,10 @@ define <2 x i1> @test25_vec(<2 x i32> %A) {
|
||||
|
||||
define i1 @test26(i32 %A, i32 %B) {
|
||||
; CHECK-LABEL: @test26(
|
||||
; CHECK-NEXT: [[C:%.*]] = shl nuw i32 1, [[B:%.*]]
|
||||
; CHECK-NEXT: [[D:%.*]] = srem i32 [[A:%.*]], [[C]]
|
||||
; CHECK-NEXT: [[E:%.*]] = icmp ne i32 [[D]], 0
|
||||
; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw i32 -1, [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[NOTMASK]], -1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[E:%.*]] = icmp ne i32 [[TMP2]], 0
|
||||
; CHECK-NEXT: ret i1 [[E]]
|
||||
;
|
||||
%C = shl i32 1, %B ; not a constant
|
||||
@ -729,8 +730,8 @@ define i1 @test27(i32 %A, i32* %remdst) {
|
||||
|
||||
define i1 @test28(i32 %A) {
|
||||
; CHECK-LABEL: @test28(
|
||||
; CHECK-NEXT: [[B:%.*]] = srem i32 [[A:%.*]], -2147483648
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[B]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 2147483647
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP1]], 0
|
||||
; CHECK-NEXT: ret i1 [[C]]
|
||||
;
|
||||
%B = srem i32 %A, 2147483648 ; signbit
|
||||
|
Loading…
x
Reference in New Issue
Block a user