mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-27 07:12:06 +00:00
Strength reduce intrinsics with overflow into regular arithmetic operations if possible.
Some intrinsics, like s/uadd.with.overflow and umul.with.overflow, are already strength reduced. This change adds other arithmetic intrinsics: s/usub.with.overflow, smul.with.overflow. It completes the work on PR20194. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224417 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
80c5783c88
commit
96bd465d6c
@ -285,6 +285,7 @@ private:
|
||||
bool WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS, Instruction *CxtI);
|
||||
bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
|
||||
bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
|
||||
bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction *CxtI);
|
||||
Value *EmitGEPOffset(User *GEP);
|
||||
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
|
||||
Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
|
||||
|
@ -1008,6 +1008,51 @@ bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS,
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Return true if we can prove that:
|
||||
/// (mul LHS, RHS) === (mul nsw LHS, RHS)
|
||||
bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
|
||||
Instruction *CxtI) {
|
||||
if (IntegerType *IT = dyn_cast<IntegerType>(LHS->getType())) {
|
||||
|
||||
// Multiplying n * m significant bits yields a result of n + m significant
|
||||
// bits. If the total number of significant bits does not exceed the
|
||||
// result bit width (minus 1), there is no overflow.
|
||||
// This means if we have enough leading sign bits in the operands
|
||||
// we can guarantee that the result does not overflow.
|
||||
// Ref: "Hacker's Delight" by Henry Warren
|
||||
unsigned BitWidth = IT->getBitWidth();
|
||||
|
||||
// Note that underestimating the number of sign bits gives a more
|
||||
// conservative answer.
|
||||
unsigned SignBits = ComputeNumSignBits(LHS, 0, CxtI) +
|
||||
ComputeNumSignBits(RHS, 0, CxtI);
|
||||
|
||||
// First handle the easy case: if we have enough sign bits there's
|
||||
// definitely no overflow.
|
||||
if (SignBits > BitWidth + 1)
|
||||
return true;
|
||||
|
||||
// There are two ambiguous cases where there can be no overflow:
|
||||
// SignBits == BitWidth + 1 and
|
||||
// SignBits == BitWidth
|
||||
// The second case is difficult to check, therefore we only handle the
|
||||
// first case.
|
||||
if (SignBits == BitWidth + 1) {
|
||||
// It overflows only when both arguments are negative and the true
|
||||
// product is exactly the minimum negative number.
|
||||
// E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
|
||||
// For simplicity we just check if at least one side is not negative.
|
||||
bool LHSNonNegative, LHSNegative;
|
||||
bool RHSNonNegative, RHSNegative;
|
||||
ComputeSignBit(LHS, LHSNonNegative, LHSNegative, DL, 0, AT, CxtI, DT);
|
||||
ComputeSignBit(RHS, RHSNonNegative, RHSNegative, DL, 0, AT, CxtI, DT);
|
||||
if (LHSNonNegative || RHSNonNegative)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Checks if any operand is negative and we can convert add to sub.
|
||||
// This function checks for following negative patterns
|
||||
// ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C))
|
||||
|
@ -427,6 +427,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false);
|
||||
}
|
||||
}
|
||||
if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) {
|
||||
if (WillNotOverflowSignedSub(LHS, RHS, II)) {
|
||||
return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false);
|
||||
}
|
||||
} else {
|
||||
if (WillNotOverflowUnsignedSub(LHS, RHS, II)) {
|
||||
return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Intrinsic::umul_with_overflow: {
|
||||
@ -477,6 +486,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
/*ReUseName*/false);
|
||||
}
|
||||
}
|
||||
if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) {
|
||||
Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
|
||||
if (WillNotOverflowSignedMul(LHS, RHS, II)) {
|
||||
return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Intrinsic::minnum:
|
||||
case Intrinsic::maxnum: {
|
||||
|
@ -1,10 +1,17 @@
|
||||
; RUN: opt -instcombine -S < %s | FileCheck %s
|
||||
|
||||
%overflow.result = type {i8, i1}
|
||||
%ov.result.32 = type { i32, i1 }
|
||||
|
||||
declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8)
|
||||
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
|
||||
declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8)
|
||||
|
||||
declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8) nounwind readnone
|
||||
declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8) nounwind readnone
|
||||
declare %ov.result.32 @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
|
||||
declare %ov.result.32 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
|
||||
declare %ov.result.32 @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
|
||||
declare %ov.result.32 @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
|
||||
declare %ov.result.32 @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
|
||||
declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
|
||||
declare double @llvm.powi.f64(double, i32) nounwind readonly
|
||||
declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
|
||||
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
|
||||
@ -91,17 +98,92 @@ define i8 @uaddtest7(i8 %A, i8 %B) {
|
||||
}
|
||||
|
||||
; PR20194
|
||||
define { i32, i1 } @saddtest1(i8 %a, i8 %b) {
|
||||
define %ov.result.32 @saddtest_nsw(i8 %a, i8 %b) {
|
||||
%A = sext i8 %a to i32
|
||||
%B = sext i8 %b to i32
|
||||
%x = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %A, i32 %B)
|
||||
ret { i32, i1 } %x
|
||||
; CHECK-LABEL: @saddtest1
|
||||
%x = call %ov.result.32 @llvm.sadd.with.overflow.i32(i32 %A, i32 %B)
|
||||
ret %ov.result.32 %x
|
||||
; CHECK-LABEL: @saddtest_nsw
|
||||
; CHECK: %x = add nsw i32 %A, %B
|
||||
; CHECK-NEXT: %1 = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 %x, 0
|
||||
; CHECK-NEXT: ret { i32, i1 } %1
|
||||
; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
|
||||
; CHECK-NEXT: ret %ov.result.32 %1
|
||||
}
|
||||
|
||||
define %ov.result.32 @uaddtest_nuw(i32 %a, i32 %b) {
|
||||
%A = and i32 %a, 2147483647
|
||||
%B = and i32 %b, 2147483647
|
||||
%x = call %ov.result.32 @llvm.uadd.with.overflow.i32(i32 %A, i32 %B)
|
||||
ret %ov.result.32 %x
|
||||
; CHECK-LABEL: @uaddtest_nuw
|
||||
; CHECK: %x = add nuw i32 %A, %B
|
||||
; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
|
||||
; CHECK-NEXT: ret %ov.result.32 %1
|
||||
}
|
||||
|
||||
define %ov.result.32 @ssubtest_nsw(i8 %a, i8 %b) {
|
||||
%A = sext i8 %a to i32
|
||||
%B = sext i8 %b to i32
|
||||
%x = call %ov.result.32 @llvm.ssub.with.overflow.i32(i32 %A, i32 %B)
|
||||
ret %ov.result.32 %x
|
||||
; CHECK-LABEL: @ssubtest_nsw
|
||||
; CHECK: %x = sub nsw i32 %A, %B
|
||||
; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
|
||||
; CHECK-NEXT: ret %ov.result.32 %1
|
||||
}
|
||||
|
||||
define %ov.result.32 @usubtest_nuw(i32 %a, i32 %b) {
|
||||
%A = or i32 %a, 2147483648
|
||||
%B = and i32 %b, 2147483647
|
||||
%x = call %ov.result.32 @llvm.usub.with.overflow.i32(i32 %A, i32 %B)
|
||||
ret %ov.result.32 %x
|
||||
; CHECK-LABEL: @usubtest_nuw
|
||||
; CHECK: %x = sub nuw i32 %A, %B
|
||||
; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
|
||||
; CHECK-NEXT: ret %ov.result.32 %1
|
||||
}
|
||||
|
||||
define %ov.result.32 @smultest1_nsw(i32 %a, i32 %b) {
|
||||
%A = and i32 %a, 4095 ; 0xfff
|
||||
%B = and i32 %b, 524287; 0x7ffff
|
||||
%x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
|
||||
ret %ov.result.32 %x
|
||||
; CHECK-LABEL: @smultest1_nsw
|
||||
; CHECK: %x = mul nsw i32 %A, %B
|
||||
; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
|
||||
; CHECK-NEXT: ret %ov.result.32 %1
|
||||
}
|
||||
|
||||
define %ov.result.32 @smultest2_nsw(i32 %a, i32 %b) {
|
||||
%A = ashr i32 %a, 16
|
||||
%B = ashr i32 %b, 16
|
||||
%x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
|
||||
ret %ov.result.32 %x
|
||||
; CHECK-LABEL: @smultest2_nsw
|
||||
; CHECK: %x = mul nsw i32 %A, %B
|
||||
; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
|
||||
; CHECK-NEXT: ret %ov.result.32 %1
|
||||
}
|
||||
|
||||
define %ov.result.32 @smultest3_sw(i32 %a, i32 %b) {
|
||||
%A = ashr i32 %a, 16
|
||||
%B = ashr i32 %b, 15
|
||||
%x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
|
||||
ret %ov.result.32 %x
|
||||
; CHECK-LABEL: @smultest3_sw
|
||||
; CHECK: %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
|
||||
; CHECK-NEXT: ret %ov.result.32 %x
|
||||
}
|
||||
|
||||
define %ov.result.32 @umultest_nuw(i32 %a, i32 %b) {
|
||||
%A = and i32 %a, 65535 ; 0xffff
|
||||
%B = and i32 %b, 65535 ; 0xffff
|
||||
%x = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %A, i32 %B)
|
||||
ret %ov.result.32 %x
|
||||
; CHECK-LABEL: @umultest_nuw
|
||||
; CHECK: %x = mul nuw i32 %A, %B
|
||||
; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
|
||||
; CHECK-NEXT: ret %ov.result.32 %1
|
||||
}
|
||||
|
||||
define i8 @umultest1(i8 %A, i1* %overflowPtr) {
|
||||
%x = call %overflow.result @llvm.umul.with.overflow.i8(i8 0, i8 %A)
|
||||
@ -125,9 +207,6 @@ define i8 @umultest2(i8 %A, i1* %overflowPtr) {
|
||||
; CHECK-NEXT: ret i8 %A
|
||||
}
|
||||
|
||||
%ov.result.32 = type { i32, i1 }
|
||||
declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
|
||||
|
||||
define i32 @umultest3(i32 %n) nounwind {
|
||||
%shr = lshr i32 %n, 2
|
||||
%mul = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %shr, i32 3)
|
||||
|
Loading…
x
Reference in New Issue
Block a user