mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-20 03:18:09 +00:00
[DSE]Enhance shorthening MemIntrinsic based on OverlapIntervals
Summary: This change use the overlap interval map built from partial overwrite tracking to perform shortening MemIntrinsics. Add test cases which was missing opportunities before. Reviewers: hfinkel, eeckstein, mcrosier Subscribers: mcrosier, llvm-commits Differential Revision: https://reviews.llvm.org/D21909 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275571 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
733cec8f05
commit
2ca934ea31
@ -290,8 +290,8 @@ enum OverwriteResult {
|
||||
};
|
||||
}
|
||||
|
||||
typedef DenseMap<Instruction *,
|
||||
std::map<int64_t, int64_t>> InstOverlapIntervalsTy;
|
||||
typedef std::map<int64_t, int64_t> OverlapIntervalsTy;
|
||||
typedef DenseMap<Instruction *, OverlapIntervalsTy> InstOverlapIntervalsTy;
|
||||
|
||||
/// Return 'OverwriteComplete' if a store to the 'Later' location completely
|
||||
/// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of
|
||||
@ -438,9 +438,9 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
|
||||
//
|
||||
// In this case we may want to trim the size of earlier to avoid generating
|
||||
// writes to addresses which will definitely be overwritten later
|
||||
if (LaterOff > EarlierOff &&
|
||||
LaterOff < int64_t(EarlierOff + Earlier.Size) &&
|
||||
int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
|
||||
if (!EnablePartialOverwriteTracking &&
|
||||
(LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) &&
|
||||
int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)))
|
||||
return OverwriteEnd;
|
||||
|
||||
// Finally, we also need to check if the later store overwrites the beginning
|
||||
@ -452,9 +452,11 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
|
||||
// In this case we may want to move the destination address and trim the size
|
||||
// of earlier to avoid generating writes to addresses which will definitely
|
||||
// be overwritten later.
|
||||
if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff) {
|
||||
assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size)
|
||||
&& "Expect to be handled as OverwriteComplete" );
|
||||
if (!EnablePartialOverwriteTracking &&
|
||||
(LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff)) {
|
||||
assert(int64_t(LaterOff + Later.Size) <
|
||||
int64_t(EarlierOff + Earlier.Size) &&
|
||||
"Expect to be handled as OverwriteComplete");
|
||||
return OverwriteBegin;
|
||||
}
|
||||
// Otherwise, they don't completely overlap.
|
||||
@ -819,6 +821,119 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
|
||||
return MadeChange;
|
||||
}
|
||||
|
||||
static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset,
|
||||
int64_t &EarlierSize, int64_t LaterOffset,
|
||||
int64_t LaterSize, bool IsOverwriteEnd) {
|
||||
// TODO: base this on the target vector size so that if the earlier
|
||||
// store was too small to get vector writes anyway then its likely
|
||||
// a good idea to shorten it
|
||||
// Power of 2 vector writes are probably always a bad idea to optimize
|
||||
// as any store/memset/memcpy is likely using vector instructions so
|
||||
// shortening it to not vector size is likely to be slower
|
||||
MemIntrinsic *EarlierIntrinsic = cast<MemIntrinsic>(EarlierWrite);
|
||||
unsigned EarlierWriteAlign = EarlierIntrinsic->getAlignment();
|
||||
if (!IsOverwriteEnd)
|
||||
LaterOffset = int64_t(LaterOffset + LaterSize);
|
||||
|
||||
if (!(llvm::isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) &&
|
||||
!((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0))
|
||||
return false;
|
||||
|
||||
DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
|
||||
<< (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *EarlierWrite
|
||||
<< "\n KILLER (offset " << LaterOffset << ", " << EarlierSize
|
||||
<< ")\n");
|
||||
|
||||
int64_t NewLength = IsOverwriteEnd
|
||||
? LaterOffset - EarlierOffset
|
||||
: EarlierSize - (LaterOffset - EarlierOffset);
|
||||
|
||||
Value *EarlierWriteLength = EarlierIntrinsic->getLength();
|
||||
Value *TrimmedLength =
|
||||
ConstantInt::get(EarlierWriteLength->getType(), NewLength);
|
||||
EarlierIntrinsic->setLength(TrimmedLength);
|
||||
|
||||
EarlierSize = NewLength;
|
||||
if (!IsOverwriteEnd) {
|
||||
int64_t OffsetMoved = (LaterOffset - EarlierOffset);
|
||||
Value *Indices[1] = {
|
||||
ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)};
|
||||
GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
|
||||
EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite);
|
||||
EarlierIntrinsic->setDest(NewDestGEP);
|
||||
EarlierOffset = EarlierOffset + OffsetMoved;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool tryToShortenEnd(Instruction *EarlierWrite,
|
||||
OverlapIntervalsTy &IntervalMap,
|
||||
int64_t &EarlierStart, int64_t &EarlierSize) {
|
||||
if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
|
||||
return false;
|
||||
|
||||
OverlapIntervalsTy::iterator OII = --IntervalMap.end();
|
||||
int64_t LaterStart = OII->second;
|
||||
int64_t LaterSize = OII->first - LaterStart;
|
||||
|
||||
if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize &&
|
||||
LaterStart + LaterSize >= EarlierStart + EarlierSize) {
|
||||
if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
|
||||
LaterSize, true)) {
|
||||
IntervalMap.erase(OII);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool tryToShortenBegin(Instruction *EarlierWrite,
|
||||
OverlapIntervalsTy &IntervalMap,
|
||||
int64_t &EarlierStart, int64_t &EarlierSize) {
|
||||
if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
|
||||
return false;
|
||||
|
||||
OverlapIntervalsTy::iterator OII = IntervalMap.begin();
|
||||
int64_t LaterStart = OII->second;
|
||||
int64_t LaterSize = OII->first - LaterStart;
|
||||
|
||||
if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) {
|
||||
assert(LaterStart + LaterSize < EarlierStart + EarlierSize &&
|
||||
"Should have been handled as OverwriteComplete");
|
||||
if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
|
||||
LaterSize, false)) {
|
||||
IntervalMap.erase(OII);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool removePartiallyOverlappedStores(AliasAnalysis *AA,
|
||||
const DataLayout &DL,
|
||||
InstOverlapIntervalsTy &IOL) {
|
||||
bool Changed = false;
|
||||
for (auto OI : IOL) {
|
||||
Instruction *EarlierWrite = OI.first;
|
||||
MemoryLocation Loc = getLocForWrite(EarlierWrite, *AA);
|
||||
assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
|
||||
assert(Loc.Size != MemoryLocation::UnknownSize && "Unexpected mem loc");
|
||||
|
||||
const Value *Ptr = Loc.Ptr->stripPointerCasts();
|
||||
int64_t EarlierStart = 0;
|
||||
int64_t EarlierSize = int64_t(Loc.Size);
|
||||
GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
|
||||
OverlapIntervalsTy &IntervalMap = OI.second;
|
||||
Changed =
|
||||
tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
|
||||
if (IntervalMap.empty())
|
||||
continue;
|
||||
Changed |=
|
||||
tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
|
||||
AliasAnalysis *AA, MemoryDependenceResults *MD,
|
||||
const DataLayout &DL,
|
||||
@ -936,7 +1051,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
|
||||
if (OR == OverwriteComplete) {
|
||||
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
|
||||
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
|
||||
|
||||
IOL.erase(DepWrite);
|
||||
// Delete the store and now-dead instructions that feed it.
|
||||
deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI);
|
||||
++NumFastStores;
|
||||
@ -948,48 +1063,14 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
|
||||
} else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||
|
||||
((OR == OverwriteBegin &&
|
||||
isShortenableAtTheBeginning(DepWrite)))) {
|
||||
// TODO: base this on the target vector size so that if the earlier
|
||||
// store was too small to get vector writes anyway then its likely
|
||||
// a good idea to shorten it
|
||||
// Power of 2 vector writes are probably always a bad idea to optimize
|
||||
// as any store/memset/memcpy is likely using vector instructions so
|
||||
// shortening it to not vector size is likely to be slower
|
||||
MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite);
|
||||
unsigned DepWriteAlign = DepIntrinsic->getAlignment();
|
||||
assert(!EnablePartialOverwriteTracking && "Do not expect to perform "
|
||||
"when partial-overwrite "
|
||||
"tracking is enabled");
|
||||
int64_t EarlierSize = DepLoc.Size;
|
||||
int64_t LaterSize = Loc.Size;
|
||||
bool IsOverwriteEnd = (OR == OverwriteEnd);
|
||||
if (!IsOverwriteEnd)
|
||||
InstWriteOffset = int64_t(InstWriteOffset + Loc.Size);
|
||||
|
||||
if ((llvm::isPowerOf2_64(InstWriteOffset) &&
|
||||
DepWriteAlign <= InstWriteOffset) ||
|
||||
((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
|
||||
|
||||
DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
|
||||
<< (IsOverwriteEnd ? "END" : "BEGIN") << ": "
|
||||
<< *DepWrite << "\n KILLER (offset "
|
||||
<< InstWriteOffset << ", " << DepLoc.Size << ")"
|
||||
<< *Inst << '\n');
|
||||
|
||||
int64_t NewLength =
|
||||
IsOverwriteEnd
|
||||
? InstWriteOffset - DepWriteOffset
|
||||
: DepLoc.Size - (InstWriteOffset - DepWriteOffset);
|
||||
|
||||
Value *DepWriteLength = DepIntrinsic->getLength();
|
||||
Value *TrimmedLength =
|
||||
ConstantInt::get(DepWriteLength->getType(), NewLength);
|
||||
DepIntrinsic->setLength(TrimmedLength);
|
||||
|
||||
if (!IsOverwriteEnd) {
|
||||
int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset);
|
||||
Value *Indices[1] = {
|
||||
ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};
|
||||
GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
|
||||
DepIntrinsic->getRawDest(), Indices, "", DepWrite);
|
||||
DepIntrinsic->setDest(NewDestGEP);
|
||||
}
|
||||
MadeChange = true;
|
||||
}
|
||||
MadeChange = tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
|
||||
InstWriteOffset, LaterSize, IsOverwriteEnd);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1012,6 +1093,9 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
|
||||
}
|
||||
}
|
||||
|
||||
if (EnablePartialOverwriteTracking)
|
||||
MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL);
|
||||
|
||||
// If this block ends in a return, unwind, or unreachable, all allocas are
|
||||
// dead at its end, which means stores to them are also dead.
|
||||
if (BB.getTerminator()->getNumSuccessors() == 0)
|
||||
|
@ -86,5 +86,23 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @write8To15AndThen0To7(i64* nocapture %P) {
|
||||
entry:
|
||||
; CHECK-LABEL: @write8To15AndThen0To7(
|
||||
; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %mybase0, i64 16
|
||||
; CHECK: tail call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64 16, i32 8, i1 false)
|
||||
|
||||
%base0 = bitcast i64* %P to i8*
|
||||
%mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
|
||||
tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32 8, i1 false)
|
||||
|
||||
%base64_0 = getelementptr inbounds i64, i64* %P, i64 0
|
||||
%base64_1 = getelementptr inbounds i64, i64* %P, i64 1
|
||||
|
||||
store i64 1, i64* %base64_1
|
||||
store i64 2, i64* %base64_0
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
|
||||
|
||||
|
@ -93,3 +93,20 @@ entry:
|
||||
store i64 3, i64* %tf_trapno, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @write16To23AndThen24To31(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) {
|
||||
entry:
|
||||
; CHECK-LABEL: @write16To23AndThen24To31(
|
||||
; CHECK: tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 16, i32 8, i1 false)
|
||||
|
||||
%base0 = bitcast i64* %P to i8*
|
||||
%mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
|
||||
tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32 8, i1 false)
|
||||
|
||||
%base64_2 = getelementptr inbounds i64, i64* %P, i64 2
|
||||
%base64_3 = getelementptr inbounds i64, i64* %P, i64 3
|
||||
|
||||
store i64 3, i64* %base64_2
|
||||
store i64 3, i64* %base64_3
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user