mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-06 19:10:00 +00:00
[InstSimplify] fold extracting from std::pair (1/2)
This patch intends to enable jump threading when a method whose return type is std::pair<int, bool> or std::pair<bool, int> is inlined. For example, jump threading does not happen for the if statement in func. std::pair<int, bool> callee(int v) { int a = dummy(v); if (a) return std::make_pair(dummy(v), true); else return std::make_pair(v, v < 0); } int func(int v) { std::pair<int, bool> rc = callee(v); if (rc.second) { // do something } SROA executed before the method inlining replaces std::pair by i64 without splitting in both callee and func since at this point no access to the individual fields is seen to SROA. After inlining, jump threading fails to identify that the incoming value is a constant due to additional instructions (like or, and, trunc). This series of patch add patterns in InstructionSimplify to fold extraction of members of std::pair. To help jump threading, actually we need to optimize the code sequence spanning multiple BBs. These patches does not handle phi by itself, but these additional patterns help NewGVN pass, which calls instsimplify to check opportunities for simplifying instructions over phi, apply phi-of-ops optimization to result in successful jump threading. SimplifyDemandedBits in InstCombine, can do more general optimization but this patch aims to provide opportunities for other optimizers by supporting a simple but common case in InstSimplify. This first patch in the series handles code sequences that merges two values using shl and or and then extracts one value using lshr. Differential Revision: https://reviews.llvm.org/D48828 llvm-svn: 338485
This commit is contained in:
parent
851132b80e
commit
6675af6a42
@ -1325,6 +1325,23 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
|
||||
if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1))))
|
||||
return X;
|
||||
|
||||
// ((X << A) | Y) >> A -> X if effective width of Y is not larger than A.
|
||||
// We can return X as we do in the above case since OR alters no bits in X.
|
||||
// SimplifyDemandedBits in InstCombine can do more general optimization for
|
||||
// bit manipulation. This pattern aims to provide opportunities for other
|
||||
// optimizers by supporting a simple but common case in InstSimplify.
|
||||
Value *Y;
|
||||
const APInt *ShRAmt, *ShLAmt;
|
||||
if (match(Op1, m_APInt(ShRAmt)) &&
|
||||
match(Op0, m_c_Or(m_NUWShl(m_Value(X), m_APInt(ShLAmt)), m_Value(Y))) &&
|
||||
*ShRAmt == *ShLAmt) {
|
||||
const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
|
||||
const unsigned Width = Op0->getType()->getScalarSizeInBits();
|
||||
const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
|
||||
if (EffWidthY <= ShRAmt->getZExtValue())
|
||||
return X;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -178,11 +178,7 @@ define <2 x i8> @shl_by_sext_bool_vec(<2 x i1> %x, <2 x i8> %y) {
|
||||
define i64 @shl_or_shr(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @shl_or_shr(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 32
|
||||
; CHECK-NEXT: ret i64 [[TMP5]]
|
||||
; CHECK-NEXT: ret i64 [[TMP1]]
|
||||
;
|
||||
%tmp1 = zext i32 %a to i64
|
||||
%tmp2 = zext i32 %b to i64
|
||||
@ -214,11 +210,7 @@ define i64 @shl_or_shr2(i32 %a, i32 %b) {
|
||||
define <2 x i64> @shl_or_shr1v(<2 x i32> %a, <2 x i32> %b) {
|
||||
; CHECK-LABEL: @shl_or_shr1v(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP3]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = lshr <2 x i64> [[TMP4]], <i64 32, i64 32>
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP5]]
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
||||
;
|
||||
%tmp1 = zext <2 x i32> %a to <2 x i64>
|
||||
%tmp2 = zext <2 x i32> %b to <2 x i64>
|
||||
|
Loading…
Reference in New Issue
Block a user