Recommit "[InlineCost] Use TTI to check if GEP is free."

This recommits r292526 which is reverted in r292529 after fixing the test case.

The original summary:

Currently, a GEP is considered free only if its indices are all constant.
TTI::getGEPCost() can give target-specific more accurate analysis. TTI is
already used for the cost of many other instructions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292570 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Haicheng Wu 2017-01-20 03:09:11 +00:00
parent 5067929193
commit 9f9ec3cd24
2 changed files with 48 additions and 2 deletions

View File

@ -134,6 +134,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
int InstructionCost); int InstructionCost);
bool isGEPOffsetConstant(GetElementPtrInst &GEP); bool isGEPOffsetConstant(GetElementPtrInst &GEP);
bool isGEPFree(GetElementPtrInst &GEP);
bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
bool simplifyCallSite(Function *F, CallSite CS); bool simplifyCallSite(Function *F, CallSite CS);
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
@ -331,6 +332,21 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
return true; return true;
} }
/// \brief Use TTI to check whether a GEP is free.
///
/// Respects any simplified values known during the analysis of this callsite.
bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
SmallVector<Value *, 4> Indices;
for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
Indices.push_back(SimpleOp);
else
Indices.push_back(*I);
return TargetTransformInfo::TCC_Free ==
TTI.getGEPCost(GEP.getSourceElementType(), GEP.getPointerOperand(),
Indices);
}
bool CallAnalyzer::visitAlloca(AllocaInst &I) { bool CallAnalyzer::visitAlloca(AllocaInst &I) {
// Check whether inlining will turn a dynamic alloca into a static // Check whether inlining will turn a dynamic alloca into a static
// alloca and handle that case. // alloca and handle that case.
@ -396,7 +412,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
// Non-constant GEPs aren't folded, and disable SROA. // Non-constant GEPs aren't folded, and disable SROA.
if (SROACandidate) if (SROACandidate)
disableSROA(CostIt); disableSROA(CostIt);
return false; return isGEPFree(I);
} }
// Add the result as a new mapping to Base + Offset. // Add the result as a new mapping to Base + Offset.
@ -422,7 +438,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
// Variable GEPs will require math and will disable SROA. // Variable GEPs will require math and will disable SROA.
if (SROACandidate) if (SROACandidate)
disableSROA(CostIt); disableSROA(CostIt);
return false; return isGEPFree(I);
} }
bool CallAnalyzer::visitBitCast(BitCastInst &I) { bool CallAnalyzer::visitBitCast(BitCastInst &I) {

View File

@ -0,0 +1,30 @@
; REQUIRES: asserts
; RUN: opt -inline < %s -mtriple=aarch64--linux-gnu -mcpu=kryo -S -debug-only=inline-cost 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
define void @outer([4 x i32]* %ptr, i32 %i) {
call void @inner1([4 x i32]* %ptr, i32 %i)
call void @inner2([4 x i32]* %ptr, i32 %i)
ret void
}
; The gep in inner1() is reg+reg, which is a legal addressing mode for AArch64.
; Thus, both the gep and ret can be simplified.
; CHECK: Analyzing call of inner1
; CHECK: NumInstructionsSimplified: 2
; CHECK: NumInstructions: 2
define void @inner1([4 x i32]* %ptr, i32 %i) {
%G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 0, i32 %i
ret void
}
; The gep in inner2() is reg+imm+reg, which is not a legal addressing mode for
; AArch64. Thus, only the ret can be simplified and not the gep.
; CHECK: Analyzing call of inner2
; CHECK: NumInstructionsSimplified: 1
; CHECK: NumInstructions: 2
define void @inner2([4 x i32]* %ptr, i32 %i) {
%G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 1, i32 %i
ret void
}