mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-14 07:09:08 +00:00
[InstCombine] Optimize overflow check base on uadd.with.overflow result
Fix for https://bugs.llvm.org/show_bug.cgi?id=40846. This adds a combine for cases where a (a + b) < a style overflow check is performed, but with a + b being the result of uadd.with.overflow, so the overflow result is also already available and we can just use it. Subsequently GVN/CSE will deduplicate the extracts. We can run into this situation if you have both a uadd.with.overflow and a manual add + overflow check in the same function (on the same operands), in which case GVN will rewrite the add to the with.overflow result and leave you with this pattern. The implementation is a bit ugly because I'm handling the various canonicalization edge cases. This does not yet handle the negated version of this pattern. Differential Revision: https://reviews.llvm.org/D58644
This commit is contained in:
parent
cc8987f5b5
commit
a01112ce3c
@ -5386,6 +5386,36 @@ static Instruction *foldVectorCmp(CmpInst &Cmp,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// extract(uadd.with.overflow(A, B), 0) ult A
|
||||||
|
// -> extract(uadd.with.overflow(A, B), 1)
|
||||||
|
static Instruction *foldICmpOfUAddOv(ICmpInst &I) {
|
||||||
|
CmpInst::Predicate Pred = I.getPredicate();
|
||||||
|
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
|
||||||
|
|
||||||
|
Value *UAddOv;
|
||||||
|
Value *A, *B;
|
||||||
|
auto UAddOvResultPat = m_ExtractValue<0>(
|
||||||
|
m_Intrinsic<Intrinsic::uadd_with_overflow>(m_Value(A), m_Value(B)));
|
||||||
|
if (match(Op0, UAddOvResultPat) &&
|
||||||
|
((Pred == ICmpInst::ICMP_ULT && (Op1 == A || Op1 == B)) ||
|
||||||
|
(Pred == ICmpInst::ICMP_EQ && match(Op1, m_ZeroInt()) &&
|
||||||
|
(match(A, m_One()) || match(B, m_One()))) ||
|
||||||
|
(Pred == ICmpInst::ICMP_NE && match(Op1, m_AllOnes()) &&
|
||||||
|
(match(A, m_AllOnes()) || match(B, m_AllOnes())))))
|
||||||
|
// extract(uadd.with.overflow(A, B), 0) < A
|
||||||
|
// extract(uadd.with.overflow(A, 1), 0) == 0
|
||||||
|
// extract(uadd.with.overflow(A, -1), 0) != -1
|
||||||
|
UAddOv = cast<ExtractValueInst>(Op0)->getAggregateOperand();
|
||||||
|
else if (match(Op1, UAddOvResultPat) &&
|
||||||
|
Pred == ICmpInst::ICMP_UGT && (Op0 == A || Op0 == B))
|
||||||
|
// A > extract(uadd.with.overflow(A, B), 0)
|
||||||
|
UAddOv = cast<ExtractValueInst>(Op1)->getAggregateOperand();
|
||||||
|
else
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
return ExtractValueInst::Create(UAddOv, 1);
|
||||||
|
}
|
||||||
|
|
||||||
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
|
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
|
||||||
bool Changed = false;
|
bool Changed = false;
|
||||||
const SimplifyQuery Q = SQ.getWithInstruction(&I);
|
const SimplifyQuery Q = SQ.getWithInstruction(&I);
|
||||||
@ -5574,6 +5604,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
|
|||||||
if (Instruction *Res = foldICmpEquality(I))
|
if (Instruction *Res = foldICmpEquality(I))
|
||||||
return Res;
|
return Res;
|
||||||
|
|
||||||
|
if (Instruction *Res = foldICmpOfUAddOv(I))
|
||||||
|
return Res;
|
||||||
|
|
||||||
// The 'cmpxchg' instruction returns an aggregate containing the old value and
|
// The 'cmpxchg' instruction returns an aggregate containing the old value and
|
||||||
// an i1 which indicates whether or not we successfully did the swap.
|
// an i1 which indicates whether or not we successfully did the swap.
|
||||||
//
|
//
|
||||||
|
@ -356,8 +356,7 @@ define i1 @uadd_res_ult_x(i32 %x, i32 %y, i1* %p) nounwind {
|
|||||||
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
|
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
|
||||||
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
||||||
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
|
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
|
||||||
; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
|
; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
||||||
; CHECK-NEXT: [[D:%.*]] = icmp ult i32 [[C]], [[X]]
|
|
||||||
; CHECK-NEXT: ret i1 [[D]]
|
; CHECK-NEXT: ret i1 [[D]]
|
||||||
;
|
;
|
||||||
%a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
|
%a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
|
||||||
@ -373,8 +372,7 @@ define i1 @uadd_res_ult_y(i32 %x, i32 %y, i1* %p) nounwind {
|
|||||||
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
|
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
|
||||||
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
||||||
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
|
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
|
||||||
; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
|
; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
||||||
; CHECK-NEXT: [[D:%.*]] = icmp ult i32 [[C]], [[Y]]
|
|
||||||
; CHECK-NEXT: ret i1 [[D]]
|
; CHECK-NEXT: ret i1 [[D]]
|
||||||
;
|
;
|
||||||
%a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
|
%a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
|
||||||
@ -391,8 +389,7 @@ define i1 @uadd_res_ugt_x(i32 %xx, i32 %y, i1* %p) nounwind {
|
|||||||
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X]], i32 [[Y:%.*]])
|
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X]], i32 [[Y:%.*]])
|
||||||
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
||||||
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
|
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
|
||||||
; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
|
; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
||||||
; CHECK-NEXT: [[D:%.*]] = icmp ugt i32 [[X]], [[C]]
|
|
||||||
; CHECK-NEXT: ret i1 [[D]]
|
; CHECK-NEXT: ret i1 [[D]]
|
||||||
;
|
;
|
||||||
%x = urem i32 42, %xx ; Thwart complexity-based canonicalization
|
%x = urem i32 42, %xx ; Thwart complexity-based canonicalization
|
||||||
@ -410,8 +407,7 @@ define i1 @uadd_res_ugt_y(i32 %x, i32 %yy, i1* %p) nounwind {
|
|||||||
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y]])
|
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y]])
|
||||||
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
||||||
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
|
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
|
||||||
; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
|
; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
||||||
; CHECK-NEXT: [[D:%.*]] = icmp ugt i32 [[Y]], [[C]]
|
|
||||||
; CHECK-NEXT: ret i1 [[D]]
|
; CHECK-NEXT: ret i1 [[D]]
|
||||||
;
|
;
|
||||||
%y = urem i32 42, %yy ; Thwart complexity-based canonicalization
|
%y = urem i32 42, %yy ; Thwart complexity-based canonicalization
|
||||||
@ -428,8 +424,7 @@ define i1 @uadd_res_ult_const(i32 %x, i1* %p) nounwind {
|
|||||||
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 42)
|
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 42)
|
||||||
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
||||||
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
|
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
|
||||||
; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
|
; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
||||||
; CHECK-NEXT: [[D:%.*]] = icmp ult i32 [[C]], 42
|
|
||||||
; CHECK-NEXT: ret i1 [[D]]
|
; CHECK-NEXT: ret i1 [[D]]
|
||||||
;
|
;
|
||||||
%a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 42)
|
%a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 42)
|
||||||
@ -445,8 +440,7 @@ define i1 @uadd_res_ult_const_one(i32 %x, i1* %p) nounwind {
|
|||||||
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 1)
|
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 1)
|
||||||
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
||||||
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
|
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
|
||||||
; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
|
; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
||||||
; CHECK-NEXT: [[D:%.*]] = icmp eq i32 [[C]], 0
|
|
||||||
; CHECK-NEXT: ret i1 [[D]]
|
; CHECK-NEXT: ret i1 [[D]]
|
||||||
;
|
;
|
||||||
%a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 1)
|
%a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 1)
|
||||||
@ -462,8 +456,7 @@ define i1 @uadd_res_ult_const_minus_one(i32 %x, i1* %p) nounwind {
|
|||||||
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 -1)
|
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 -1)
|
||||||
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
||||||
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
|
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
|
||||||
; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
|
; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1
|
||||||
; CHECK-NEXT: [[D:%.*]] = icmp ne i32 [[C]], -1
|
|
||||||
; CHECK-NEXT: ret i1 [[D]]
|
; CHECK-NEXT: ret i1 [[D]]
|
||||||
;
|
;
|
||||||
%a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 -1)
|
%a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 -1)
|
||||||
|
Loading…
Reference in New Issue
Block a user