mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-23 12:45:47 +00:00
Swap loop invariant GEP with loop variant GEP to allow more LICM.
This patch changes the order of GEPs generated by Splitting GEPs pass, specially when one of the GEPs has constant and the base is loop invariant, then we will generate the GEP with constant first when beneficial, to expose more cases for LICM. If originally Splitting GEP generate the following: do.body.i: %idxprom.i = sext i32 %shr.i to i64 %2 = bitcast %typeD* %s to i8* %3 = shl i64 %idxprom.i, 2 %uglygep = getelementptr i8, i8* %2, i64 %3 %uglygep7 = getelementptr i8, i8* %uglygep, i64 1032 ... Now it genereates: do.body.i: %idxprom.i = sext i32 %shr.i to i64 %2 = bitcast %typeD* %s to i8* %3 = shl i64 %idxprom.i, 2 %uglygep = getelementptr i8, i8* %2, i64 1032 %uglygep7 = getelementptr i8, i8* %uglygep, i64 %3 ... For no-loop cases, the original way of generating GEPs seems to expose more CSE cases, so we don't change the logic for no-loop cases, and only limit our change to the specific case we are interested in. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248420 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
84e146173d
commit
ff25c07a59
@ -157,6 +157,9 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/MemoryBuiltins.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
@ -324,7 +327,9 @@ public:
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
AU.addRequired<ScalarEvolutionWrapperPass>();
|
||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||
AU.addRequired<LoopInfoWrapperPass>();
|
||||
AU.setPreservesCFG();
|
||||
AU.addRequired<TargetLibraryInfoWrapperPass>();
|
||||
}
|
||||
|
||||
bool doInitialization(Module &M) override {
|
||||
@ -395,10 +400,20 @@ private:
|
||||
/// Verify F is free of dead code.
|
||||
void verifyNoDeadCode(Function &F);
|
||||
|
||||
bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
|
||||
// Swap the index operand of two GEP.
|
||||
void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
|
||||
// Check if it is safe to swap operand of two GEP.
|
||||
bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
|
||||
Loop *CurLoop);
|
||||
|
||||
const DataLayout *DL;
|
||||
DominatorTree *DT;
|
||||
ScalarEvolution *SE;
|
||||
const TargetMachine *TM;
|
||||
|
||||
LoopInfo *LI;
|
||||
TargetLibraryInfo *TLI;
|
||||
/// Whether to lower a GEP with multiple indices into arithmetic operations or
|
||||
/// multiple GEPs with a single index.
|
||||
bool LowerGEP;
|
||||
@ -414,6 +429,8 @@ INITIALIZE_PASS_BEGIN(
|
||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
|
||||
INITIALIZE_PASS_END(
|
||||
SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
|
||||
"Split GEPs to a variadic base and a constant offset for better CSE", false,
|
||||
@ -756,6 +773,13 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
|
||||
Type *I8PtrTy =
|
||||
Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
|
||||
Value *ResultPtr = Variadic->getOperand(0);
|
||||
Loop *L = LI->getLoopFor(Variadic->getParent());
|
||||
// Check if the base is not loop invariant or used more than once.
|
||||
bool isSwapCandidate =
|
||||
L && L->isLoopInvariant(ResultPtr) &&
|
||||
!hasMoreThanOneUseInLoop(ResultPtr, L);
|
||||
Value *FirstResult = nullptr;
|
||||
|
||||
if (ResultPtr->getType() != I8PtrTy)
|
||||
ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
|
||||
|
||||
@ -784,6 +808,8 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
|
||||
// Create an ugly GEP with a single index for each index.
|
||||
ResultPtr =
|
||||
Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
|
||||
if (FirstResult == nullptr)
|
||||
FirstResult = ResultPtr;
|
||||
}
|
||||
}
|
||||
|
||||
@ -792,7 +818,17 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
|
||||
Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
|
||||
ResultPtr =
|
||||
Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
|
||||
}
|
||||
} else
|
||||
isSwapCandidate = false;
|
||||
|
||||
// If we created a GEP with constant index, and the base is loop invariant,
|
||||
// then we swap the first one with it, so LICM can move constant GEP out
|
||||
// later.
|
||||
GetElementPtrInst *FirstGEP = dyn_cast<GetElementPtrInst>(FirstResult);
|
||||
GetElementPtrInst *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
|
||||
if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
|
||||
swapGEPOperand(FirstGEP, SecondGEP);
|
||||
|
||||
if (ResultPtr->getType() != Variadic->getType())
|
||||
ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
|
||||
|
||||
@ -1036,16 +1072,15 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
|
||||
|
||||
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
|
||||
|
||||
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
||||
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
|
||||
bool Changed = false;
|
||||
for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
|
||||
for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) {
|
||||
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) {
|
||||
for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE;)
|
||||
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++))
|
||||
Changed |= splitGEP(GEP);
|
||||
}
|
||||
// No need to split GEP ConstantExprs because all its indices are constant
|
||||
// already.
|
||||
}
|
||||
// No need to split GEP ConstantExprs because all its indices are constant
|
||||
// already.
|
||||
}
|
||||
|
||||
Changed |= reuniteExts(F);
|
||||
@ -1138,3 +1173,93 @@ void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
|
||||
GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
|
||||
if (!FirstGEP || !FirstGEP->hasOneUse())
|
||||
return false;
|
||||
|
||||
if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
|
||||
return false;
|
||||
|
||||
if (FirstGEP == SecondGEP)
|
||||
return false;
|
||||
|
||||
unsigned FirstNum = FirstGEP->getNumOperands();
|
||||
unsigned SecondNum = SecondGEP->getNumOperands();
|
||||
// Give up if the number of operands are not 2.
|
||||
if (FirstNum != SecondNum || FirstNum != 2)
|
||||
return false;
|
||||
|
||||
Value *FirstBase = FirstGEP->getOperand(0);
|
||||
Value *SecondBase = SecondGEP->getOperand(0);
|
||||
Value *FirstOffset = FirstGEP->getOperand(1);
|
||||
// Give up if the index of the first GEP is loop invariant.
|
||||
if (CurLoop->isLoopInvariant(FirstOffset))
|
||||
return false;
|
||||
|
||||
// Give up if base doesn't have same type.
|
||||
if (FirstBase->getType() != SecondBase->getType())
|
||||
return false;
|
||||
|
||||
Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
|
||||
|
||||
// Check if the second operand of first GEP has constant coefficient.
|
||||
// For an example, for the following code, we won't gain anything by
|
||||
// hoisting the second GEP out because the second GEP can be folded away.
|
||||
// %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
|
||||
// %67 = shl i64 %scevgep.sum.ur159, 2
|
||||
// %uglygep160 = getelementptr i8* %65, i64 %67
|
||||
// %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
|
||||
|
||||
// Skip constant shift instruction which may be generated by Splitting GEPs.
|
||||
if (FirstOffsetDef && FirstOffsetDef->isShift() &&
|
||||
dyn_cast<ConstantInt>(FirstOffsetDef->getOperand(1)))
|
||||
FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
|
||||
|
||||
// Give up if FirstOffsetDef is an Add or Sub with constant.
|
||||
// Because it may not profitable at all due to constant folding.
|
||||
if (FirstOffsetDef)
|
||||
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
|
||||
unsigned opc = BO->getOpcode();
|
||||
if ((opc == Instruction::Add || opc == Instruction::Sub) &&
|
||||
(dyn_cast<ConstantInt>(BO->getOperand(0)) ||
|
||||
dyn_cast<ConstantInt>(BO->getOperand(1))))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
|
||||
int UsesInLoop = 0;
|
||||
for (User *U : V->users()) {
|
||||
if (Instruction *User = dyn_cast<Instruction>(U))
|
||||
if (L->contains(User))
|
||||
if (++UsesInLoop > 1)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
|
||||
GetElementPtrInst *Second) {
|
||||
Value *Offset1 = First->getOperand(1);
|
||||
Value *Offset2 = Second->getOperand(1);
|
||||
First->setOperand(1, Offset2);
|
||||
Second->setOperand(1, Offset1);
|
||||
|
||||
// We changed p+o+c to p+c+o, p+c may not be inbound anymore.
|
||||
const DataLayout &DAL = First->getModule()->getDataLayout();
|
||||
APInt Offset(DAL.getPointerSizeInBits(
|
||||
cast<PointerType>(First->getType())->getAddressSpace()),
|
||||
0);
|
||||
Value *NewBase =
|
||||
First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
|
||||
uint64_t ObjectSize;
|
||||
if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
|
||||
Offset.ugt(ObjectSize)) {
|
||||
First->setIsInBounds(false);
|
||||
Second->setIsInBounds(false);
|
||||
} else
|
||||
First->setIsInBounds(true);
|
||||
}
|
||||
|
50
test/CodeGen/AArch64/aarch64-loop-gep-opt.ll
Normal file
50
test/CodeGen/AArch64/aarch64-loop-gep-opt.ll
Normal file
@ -0,0 +1,50 @@
|
||||
; RUN: llc -O3 -aarch64-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck <%t %s
|
||||
; REQUIRES: asserts
|
||||
target triple = "aarch64--linux-android"
|
||||
|
||||
%typeD = type { i32, i32, [256 x i32], [257 x i32] }
|
||||
|
||||
; Function Attrs: noreturn nounwind uwtable
|
||||
define i32 @test1(%typeD* nocapture %s) {
|
||||
entry:
|
||||
; CHECK-LABEL: entry:
|
||||
; CHECK: %uglygep = getelementptr i8, i8* %0, i64 1032
|
||||
; CHECK: br label %do.body.i
|
||||
|
||||
|
||||
%tPos = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 0
|
||||
%k0 = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 1
|
||||
%.pre = load i32, i32* %tPos, align 4
|
||||
br label %do.body.i
|
||||
|
||||
do.body.i:
|
||||
; CHECK-LABEL: do.body.i:
|
||||
; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3
|
||||
; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32*
|
||||
; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032
|
||||
|
||||
|
||||
%0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ]
|
||||
%1 = phi i32 [ 0, %entry ], [ %.be6, %do.body.i.backedge ]
|
||||
%add.i = add nsw i32 %1, %0
|
||||
%shr.i = ashr i32 %add.i, 1
|
||||
%idxprom.i = sext i32 %shr.i to i64
|
||||
%arrayidx.i = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 3, i64 %idxprom.i
|
||||
%2 = load i32, i32* %arrayidx.i, align 4
|
||||
%cmp.i = icmp sle i32 %2, %.pre
|
||||
%na.1.i = select i1 %cmp.i, i32 %0, i32 %shr.i
|
||||
%nb.1.i = select i1 %cmp.i, i32 %shr.i, i32 %1
|
||||
%sub.i = sub nsw i32 %na.1.i, %nb.1.i
|
||||
%cmp1.i = icmp eq i32 %sub.i, 1
|
||||
br i1 %cmp1.i, label %fooo.exit, label %do.body.i.backedge
|
||||
|
||||
do.body.i.backedge:
|
||||
%.be = phi i32 [ %na.1.i, %do.body.i ], [ 256, %fooo.exit ]
|
||||
%.be6 = phi i32 [ %nb.1.i, %do.body.i ], [ 0, %fooo.exit ]
|
||||
br label %do.body.i
|
||||
|
||||
fooo.exit: ; preds = %do.body.i
|
||||
store i32 %nb.1.i, i32* %k0, align 4
|
||||
br label %do.body.i.backedge
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user