[InstCombine] fold lshr(mul X, SplatC), C2

This is a special-case multiply that replicates bits of
the source operand. We need this fold to avoid regression
if we make canonicalization to `mul` more aggressive for
shl+or patterns.

I did not see a way to make Alive generalize the bit width
condition for even-number-of-bits only, but an example of
the proof is:
  Name: i32
  Pre: isPowerOf2(C1 - 1) && log2(C1) == C2 && (C2 * 2 == width(C2))
  %m = mul nuw i32 %x, C1
  %t = lshr i32 %m, C2
  =>
  %t = and i32 %x, C1 - 2

  Name: i14
  %m = mul nuw i14 %x, 129
  %t = lshr i14 %m, 7
  =>
  %t = and i14 %x, 127

https://rise4fun.com/Alive/e52
This commit is contained in:
Sanjay Patel 2021-02-10 14:57:31 -05:00
parent 6bcc1fd461
commit 6e2053983e
2 changed files with 20 additions and 3 deletions

View File

@ -1145,6 +1145,16 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
}
// Look for a "splat" mul pattern - it replicates bits across each half of
// a value, so a right shift is just a mask of the low bits:
// lshr i32 (mul nuw X, Pow2+1), 16 --> and X, Pow2-1
// TODO: Generalize to allow more than just half-width shifts?
const APInt *MulC;
if (match(Op0, m_NUWMul(m_Value(X), m_APInt(MulC))) &&
ShAmt * 2 == BitWidth && (*MulC - 1).isPowerOf2() &&
MulC->logBase2() == ShAmt)
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
// If the shifted-out value is known-zero, then this is an exact shift.
if (!I.isExact() &&
MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) {

View File

@ -262,8 +262,7 @@ define <2 x i32> @narrow_lshr_constant(<2 x i8> %x, <2 x i8> %y) {
define i32 @mul_splat_fold(i32 %x) {
; CHECK-LABEL: @mul_splat_fold(
; CHECK-NEXT: [[M:%.*]] = mul nuw i32 [[X:%.*]], 65537
; CHECK-NEXT: [[T:%.*]] = lshr i32 [[M]], 16
; CHECK-NEXT: [[T:%.*]] = and i32 [[X:%.*]], 65535
; CHECK-NEXT: ret i32 [[T]]
;
%m = mul nuw i32 %x, 65537
@ -271,13 +270,15 @@ define i32 @mul_splat_fold(i32 %x) {
ret i32 %t
}
; Vector type, extra use, weird types are all ok.
declare void @usevec(<3 x i14>)
define <3 x i14> @mul_splat_fold_vec(<3 x i14> %x) {
; CHECK-LABEL: @mul_splat_fold_vec(
; CHECK-NEXT: [[M:%.*]] = mul nuw <3 x i14> [[X:%.*]], <i14 129, i14 129, i14 129>
; CHECK-NEXT: call void @usevec(<3 x i14> [[M]])
; CHECK-NEXT: [[T:%.*]] = lshr <3 x i14> [[M]], <i14 7, i14 7, i14 7>
; CHECK-NEXT: [[T:%.*]] = and <3 x i14> [[X]], <i14 127, i14 127, i14 127>
; CHECK-NEXT: ret <3 x i14> [[T]]
;
%m = mul nuw <3 x i14> %x, <i14 129, i14 129, i14 129>
@ -286,6 +287,8 @@ define <3 x i14> @mul_splat_fold_vec(<3 x i14> %x) {
ret <3 x i14> %t
}
; Negative test
define i32 @mul_splat_fold_wrong_mul_const(i32 %x) {
; CHECK-LABEL: @mul_splat_fold_wrong_mul_const(
; CHECK-NEXT: [[M:%.*]] = mul nuw i32 [[X:%.*]], 65538
@ -297,6 +300,8 @@ define i32 @mul_splat_fold_wrong_mul_const(i32 %x) {
ret i32 %t
}
; Negative test
define i32 @mul_splat_fold_wrong_lshr_const(i32 %x) {
; CHECK-LABEL: @mul_splat_fold_wrong_lshr_const(
; CHECK-NEXT: [[M:%.*]] = mul nuw i32 [[X:%.*]], 65537
@ -308,6 +313,8 @@ define i32 @mul_splat_fold_wrong_lshr_const(i32 %x) {
ret i32 %t
}
; Negative test
define i32 @mul_splat_fold_no_nuw(i32 %x) {
; CHECK-LABEL: @mul_splat_fold_no_nuw(
; CHECK-NEXT: [[M:%.*]] = mul nsw i32 [[X:%.*]], 65537