mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-12 06:06:32 +00:00
DSE: Shorten memset when a later store overwrites the start of it
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151620 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
601c094734
commit
a116623e06
@ -259,6 +259,13 @@ static bool isShortenable(Instruction *I) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// isMemset - Returns true if this instruction is an intrinsic memset
|
||||
static bool isMemset(Instruction *I) {
|
||||
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
|
||||
return II && II->getIntrinsicID() == Intrinsic::memset;
|
||||
}
|
||||
|
||||
/// getStoredPointerOperand - Return the pointer that is being written to.
|
||||
static Value *getStoredPointerOperand(Instruction *I) {
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(I))
|
||||
@ -310,14 +317,17 @@ namespace {
|
||||
{
|
||||
OverwriteComplete,
|
||||
OverwriteEnd,
|
||||
OverwriteStart,
|
||||
OverwriteUnknown
|
||||
};
|
||||
}
|
||||
|
||||
/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location
|
||||
/// completely overwrites a store to the 'Earlier' location.
|
||||
/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
|
||||
/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
|
||||
/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
|
||||
/// overwritten by 'Later', 'OverWriteStart' if the start of 'Earlier'
|
||||
/// is completely overwritten by 'Later' or 'OverwriteUnknown' if nothing
|
||||
/// can be determined
|
||||
static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
|
||||
const AliasAnalysis::Location &Earlier,
|
||||
AliasAnalysis &AA,
|
||||
@ -418,6 +428,21 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
|
||||
LaterOff < int64_t(EarlierOff + Earlier.Size) &&
|
||||
int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
|
||||
return OverwriteEnd;
|
||||
|
||||
// The other interesting case is if the later store overwrites the end of
|
||||
// the earlier store
|
||||
//
|
||||
// |--earlier--|
|
||||
// |-- later --|
|
||||
//
|
||||
// In this case we may want to trim the size of earlier to avoid generating
|
||||
// writes to addresses which will definitely be overwritten later
|
||||
if (EarlierOff >= LaterOff &&
|
||||
EarlierOff < int64_t(LaterOff + Later.Size) &&
|
||||
int64_t(EarlierOff + Earlier.Size) >= int64_t(LaterOff + Later.Size)) {
|
||||
LaterOff = LaterOff + Later.Size;
|
||||
return OverwriteStart;
|
||||
}
|
||||
|
||||
// Otherwise, they don't completely overlap.
|
||||
return OverwriteUnknown;
|
||||
@ -589,6 +614,45 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
|
||||
DepIntrinsic->setLength(TrimmedLength);
|
||||
MadeChange = true;
|
||||
}
|
||||
} else if (OR == OverwriteStart && isMemset(DepWrite)) {
|
||||
// TODO: base this on the target vector size so that if the earlier
|
||||
// store was too small to get vector writes anyway then its likely
|
||||
// a good idea to shorten it
|
||||
// Power of 2 vector writes are probably always a bad idea to optimize
|
||||
// as any store/memset/memcpy is likely using vector instructions so
|
||||
// shortening it to not vector size is likely to be slower
|
||||
// TODO: shorten memcpy and memmove by offsetting the source address.
|
||||
MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
|
||||
unsigned DepWriteAlign = DepIntrinsic->getAlignment();
|
||||
if (llvm::isPowerOf2_64(InstWriteOffset) ||
|
||||
((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
|
||||
|
||||
DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW START: "
|
||||
<< *DepWrite << "\n KILLER (offset "
|
||||
<< InstWriteOffset << ", "
|
||||
<< DepWriteOffset << ", "
|
||||
<< DepLoc.Size << ")"
|
||||
<< *Inst << '\n');
|
||||
|
||||
Value* DepWriteLength = DepIntrinsic->getLength();
|
||||
Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
|
||||
DepLoc.Size -
|
||||
(InstWriteOffset -
|
||||
DepWriteOffset));
|
||||
DepIntrinsic->setLength(TrimmedLength);
|
||||
const TargetData *TD = AA->getTargetData();
|
||||
Type *IntPtrTy = TD->getIntPtrType(BB.getContext());
|
||||
Value* Offset = ConstantInt::get(IntPtrTy,
|
||||
InstWriteOffset - DepWriteOffset);
|
||||
// Offset the start of the memset with a GEP. As the memset type is
|
||||
// i8* a GEP will do this without needing to use ptrtoint, etc.
|
||||
Value *Dest = GetElementPtrInst::Create(DepIntrinsic->getRawDest(),
|
||||
Offset,
|
||||
"",
|
||||
DepWrite);
|
||||
DepIntrinsic->setDest(Dest);
|
||||
MadeChange = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
71
test/Transforms/DeadStoreElimination/OverwriteStoreStart.ll
Normal file
71
test/Transforms/DeadStoreElimination/OverwriteStoreStart.ll
Normal file
@ -0,0 +1,71 @@
|
||||
; RUN: opt < %s -basicaa -dse -S | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
%struct.vec2 = type { <4 x i32>, <4 x i32> }
|
||||
%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 }
|
||||
|
||||
@glob1 = global %struct.vec2 zeroinitializer, align 16
|
||||
@glob2 = global %struct.vec2plusi zeroinitializer, align 16
|
||||
|
||||
define void @write4to8(i32* nocapture %p) nounwind uwtable ssp {
|
||||
; CHECK: @write4to8
|
||||
entry:
|
||||
%arrayidx0 = getelementptr inbounds i32* %p, i64 1
|
||||
%p3 = bitcast i32* %arrayidx0 to i8*
|
||||
; CHECK: call void @llvm.memset.p0i8.i64(i8* %{{[0-9]+}}, i8 0, i64 24, i32 4, i1 false)
|
||||
call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
|
||||
%arrayidx1 = getelementptr inbounds i32* %p, i64 1
|
||||
store i32 1, i32* %arrayidx1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @write4to12(i32* nocapture %p) nounwind uwtable ssp {
|
||||
; CHECK: @write4to12
|
||||
entry:
|
||||
%arrayidx0 = getelementptr inbounds i32* %p, i64 1
|
||||
%p3 = bitcast i32* %arrayidx0 to i8*
|
||||
; CHECK: call void @llvm.memset.p0i8.i64(i8* %{{[0-9]+}}, i8 0, i64 20, i32 4, i1 false)
|
||||
call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
|
||||
%arrayidx1 = bitcast i32* %arrayidx0 to i64*
|
||||
store i64 1, i64* %arrayidx1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @write4to8_2(i32* nocapture %p) nounwind uwtable ssp {
|
||||
; CHECK: @write4to8_2
|
||||
entry:
|
||||
%arrayidx0 = getelementptr inbounds i32* %p, i64 1
|
||||
%p3 = bitcast i32* %arrayidx0 to i8*
|
||||
; CHECK: call void @llvm.memset.p0i8.i64(i8* %{{[0-9]+}}, i8 0, i64 24, i32 4, i1 false)
|
||||
call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
|
||||
%arrayidx1 = bitcast i32* %p to i64*
|
||||
store i64 1, i64* %arrayidx1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @dontwrite4to6(i32* nocapture %p) nounwind uwtable ssp {
|
||||
; CHECK: @dontwrite4to6
|
||||
entry:
|
||||
%arrayidx0 = getelementptr inbounds i32* %p, i64 1
|
||||
%p3 = bitcast i32* %arrayidx0 to i8*
|
||||
; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
|
||||
call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
|
||||
%arrayidx1 = bitcast i32* %arrayidx0 to i16*
|
||||
store i16 1, i16* %arrayidx1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @write4to8_neg_gep(i32* nocapture %p) nounwind uwtable ssp {
|
||||
; CHECK: @write4to8_neg_gep
|
||||
entry:
|
||||
%arrayidx0 = getelementptr inbounds i32* %p, i64 -1
|
||||
%p3 = bitcast i32* %arrayidx0 to i8*
|
||||
; CHECK: call void @llvm.memset.p0i8.i64(i8* %{{[0-9]+}}, i8 0, i64 24, i32 4, i1 false)
|
||||
call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
|
||||
%neg2 = getelementptr inbounds i32* %p, i64 -2
|
||||
%arrayidx1 = bitcast i32* %neg2 to i64*
|
||||
store i64 1, i64* %arrayidx1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
|
Loading…
Reference in New Issue
Block a user