mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-25 21:00:00 +00:00
Generalize MemCpyOpt's handling of call slot forwarding to function properly when the call slot
forwarding is implemented with a load/store pair rather than a memcpy. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116637 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
00ed59a968
commit
6549121c66
@ -321,7 +321,8 @@ namespace {
|
||||
bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
|
||||
bool processMemCpy(MemCpyInst *M);
|
||||
bool processMemMove(MemMoveInst *M);
|
||||
bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C);
|
||||
bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
|
||||
uint64_t cpyLen, CallInst *C);
|
||||
bool iterateOnFunction(Function &F);
|
||||
};
|
||||
|
||||
@ -339,7 +340,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
|
||||
INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
|
||||
false, false)
|
||||
|
||||
|
||||
/// processStore - When GVN is scanning forward over instructions, we look for
|
||||
/// some other patterns to fold away. In particular, this looks for stores to
|
||||
/// neighboring locations of memory. If it sees enough consequtive ones
|
||||
@ -347,6 +347,37 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
|
||||
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||
if (SI->isVolatile()) return false;
|
||||
|
||||
TargetData *TD = getAnalysisIfAvailable<TargetData>();
|
||||
if (!TD) return false;
|
||||
|
||||
// Detect cases where we're performing call slot forwarding, but
|
||||
// happen to be using a load-store pair to implement it, rather than
|
||||
// a memcpy.
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
|
||||
if (!LI->isVolatile() && LI->hasOneUse()) {
|
||||
MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
|
||||
|
||||
MemDepResult dep = MD.getDependency(LI);
|
||||
CallInst *C = 0;
|
||||
if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
|
||||
C = dyn_cast<CallInst>(dep.getInst());
|
||||
|
||||
if (C) {
|
||||
bool changed = performCallSlotOptzn(LI,
|
||||
SI->getPointerOperand()->stripPointerCasts(),
|
||||
LI->getPointerOperand()->stripPointerCasts(),
|
||||
TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
|
||||
if (changed) {
|
||||
MD.removeInstruction(SI);
|
||||
SI->eraseFromParent();
|
||||
LI->eraseFromParent();
|
||||
++NumMemCpyInstr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LLVMContext &Context = SI->getContext();
|
||||
|
||||
// There are two cases that are interesting for this code to handle: memcpy
|
||||
@ -359,8 +390,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||
if (!ByteVal)
|
||||
return false;
|
||||
|
||||
TargetData *TD = getAnalysisIfAvailable<TargetData>();
|
||||
if (!TD) return false;
|
||||
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
|
||||
Module *M = SI->getParent()->getParent()->getParent();
|
||||
|
||||
@ -494,7 +523,9 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||
/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
|
||||
/// and checks for the possibility of a call slot optimization by having
|
||||
/// the call write its result directly into the destination of the memcpy.
|
||||
bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
|
||||
bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
|
||||
Value *cpyDest, Value *cpySrc,
|
||||
uint64_t cpyLen, CallInst *C) {
|
||||
// The general transformation to keep in mind is
|
||||
//
|
||||
// call @func(..., src, ...)
|
||||
@ -511,16 +542,8 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
|
||||
|
||||
// Deliberately get the source and destination with bitcasts stripped away,
|
||||
// because we'll need to do type comparisons based on the underlying type.
|
||||
Value *cpyDest = cpy->getDest();
|
||||
Value *cpySrc = cpy->getSource();
|
||||
CallSite CS(C);
|
||||
|
||||
// We need to be able to reason about the size of the memcpy, so we require
|
||||
// that it be a constant.
|
||||
ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
|
||||
if (!cpyLength)
|
||||
return false;
|
||||
|
||||
// Require that src be an alloca. This simplifies the reasoning considerably.
|
||||
AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
|
||||
if (!srcAlloca)
|
||||
@ -537,7 +560,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
|
||||
uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
|
||||
srcArraySize->getZExtValue();
|
||||
|
||||
if (cpyLength->getZExtValue() < srcSize)
|
||||
if (cpyLen < srcSize)
|
||||
return false;
|
||||
|
||||
// Check that accessing the first srcSize bytes of dest will not cause a
|
||||
@ -606,7 +629,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
|
||||
// the use analysis, we also need to know that it does not sneakily
|
||||
// access dest. We rely on AA to figure this out for us.
|
||||
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
|
||||
if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
|
||||
if (AA.getModRefInfo(C, cpyDest, srcSize) !=
|
||||
AliasAnalysis::NoModRef)
|
||||
return false;
|
||||
|
||||
@ -635,7 +658,6 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
|
||||
|
||||
// Remove the memcpy
|
||||
MD.removeInstruction(cpy);
|
||||
cpy->eraseFromParent();
|
||||
++NumMemCpyInstr;
|
||||
|
||||
return true;
|
||||
@ -649,6 +671,10 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
|
||||
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
|
||||
MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
|
||||
|
||||
// We can only optimize statically-sized memcpy's.
|
||||
ConstantInt *cpyLen = dyn_cast<ConstantInt>(M->getLength());
|
||||
if (!cpyLen) return false;
|
||||
|
||||
// The are two possible optimizations we can do for memcpy:
|
||||
// a) memcpy-memcpy xform which exposes redundance for DSE.
|
||||
// b) call-memcpy xform for return slot optimization.
|
||||
@ -656,8 +682,12 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
|
||||
if (!dep.isClobber())
|
||||
return false;
|
||||
if (!isa<MemCpyInst>(dep.getInst())) {
|
||||
if (CallInst *C = dyn_cast<CallInst>(dep.getInst()))
|
||||
return performCallSlotOptzn(M, C);
|
||||
if (CallInst *C = dyn_cast<CallInst>(dep.getInst())) {
|
||||
bool changed = performCallSlotOptzn(M, M->getDest(), M->getSource(),
|
||||
cpyLen->getZExtValue(), C);
|
||||
if (changed) M->eraseFromParent();
|
||||
return changed;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
25
test/Transforms/MemCpyOpt/loadstore-sret.ll
Normal file
25
test/Transforms/MemCpyOpt/loadstore-sret.ll
Normal file
@ -0,0 +1,25 @@
|
||||
; RUN: opt -S < %s -memcpyopt | FileCheck %s
|
||||
; <rdar://problem/8536696>
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-darwin10.0.0"
|
||||
|
||||
%"class.std::auto_ptr" = type { i32* }
|
||||
|
||||
; CHECK: @_Z3foov
|
||||
define void @_Z3foov(%"class.std::auto_ptr"* noalias nocapture sret %agg.result) ssp {
|
||||
_ZNSt8auto_ptrIiED1Ev.exit:
|
||||
%temp.lvalue = alloca %"class.std::auto_ptr", align 8
|
||||
; CHECK: call void @_Z3barv(%"class.std::auto_ptr"* sret %agg.result)
|
||||
call void @_Z3barv(%"class.std::auto_ptr"* sret %temp.lvalue)
|
||||
%tmp.i.i = getelementptr inbounds %"class.std::auto_ptr"* %temp.lvalue, i64 0, i32 0
|
||||
; CHECK-NOT: load
|
||||
%tmp2.i.i = load i32** %tmp.i.i, align 8
|
||||
%tmp.i.i4 = getelementptr inbounds %"class.std::auto_ptr"* %agg.result, i64 0, i32 0
|
||||
; CHECK-NOT: store
|
||||
store i32* %tmp2.i.i, i32** %tmp.i.i4, align 8
|
||||
; CHECK: ret void
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @_Z3barv(%"class.std::auto_ptr"* sret)
|
Loading…
Reference in New Issue
Block a user