Re-apply the patch to improve the optimizations of memcpy's, with several

bugs fixed.  This now passes PPC bootstrap.

llvm-svn: 47026
This commit is contained in:
Owen Anderson 2008-02-12 21:15:18 +00:00
parent 1122ee4ded
commit 274aa2846e
4 changed files with 125 additions and 3 deletions

View File

@ -100,6 +100,11 @@ class MemoryDependenceAnalysis : public FunctionPass {
/// updating the dependence of instructions that previously depended on it.
void removeInstruction(Instruction* rem);
/// dropInstruction - Remove an instruction from the analysis, making
/// absolutely conservative assumptions when updating the cache. This is
/// useful, for example when an instruction is changed rather than removed.
void dropInstruction(Instruction* drop);
private:
Instruction* getCallSiteDependency(CallSite C, Instruction* start,
BasicBlock* block);

View File

@ -457,6 +457,46 @@ Instruction* MemoryDependenceAnalysis::getDependency(Instruction* query,
return NonLocal;
}
/// dropInstruction - Remove an instruction from the analysis, making
/// absolutely conservative assumptions when updating the cache. This is
/// useful, for example when an instruction is changed rather than removed.
void MemoryDependenceAnalysis::dropInstruction(Instruction* drop) {
depMapType::iterator depGraphEntry = depGraphLocal.find(drop);
if (depGraphEntry != depGraphLocal.end())
reverseDep[depGraphEntry->second.first].erase(drop);
// Drop dependency information for things that depended on this instr
SmallPtrSet<Instruction*, 4>& set = reverseDep[drop];
for (SmallPtrSet<Instruction*, 4>::iterator I = set.begin(), E = set.end();
I != E; ++I)
depGraphLocal.erase(*I);
depGraphLocal.erase(drop);
reverseDep.erase(drop);
for (DenseMap<BasicBlock*, Value*>::iterator DI =
depGraphNonLocal[drop].begin(), DE = depGraphNonLocal[drop].end();
DI != DE; ++DI)
if (DI->second != None)
reverseDepNonLocal[DI->second].erase(drop);
if (reverseDepNonLocal.count(drop)) {
SmallPtrSet<Instruction*, 4>& set = reverseDepNonLocal[drop];
for (SmallPtrSet<Instruction*, 4>::iterator I = set.begin(), E = set.end();
I != E; ++I)
for (DenseMap<BasicBlock*, Value*>::iterator DI =
depGraphNonLocal[*I].begin(), DE = depGraphNonLocal[*I].end();
DI != DE; ++DI)
if (DI->second == drop)
DI->second = Dirty;
}
reverseDepNonLocal.erase(drop);
nonLocalDepMapType::iterator I = depGraphNonLocal.find(drop);
if (I != depGraphNonLocal.end())
depGraphNonLocal.erase(I);
}
/// removeInstruction - Remove an instruction from the dependence analysis,
/// updating the dependence of instructions that previously depended on it.
/// This method attempts to keep the cache coherent using the reverse map.
@ -473,7 +513,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction* rem) {
depMapType::iterator depGraphEntry = depGraphLocal.find(rem);
if (depGraphEntry != depGraphLocal.end()) {
reverseDep[depGraphLocal[rem].first].erase(rem);
reverseDep[depGraphEntry->second.first].erase(rem);
if (depGraphEntry->second.first != NonLocal &&
depGraphEntry->second.first != None &&

View File

@ -19,6 +19,7 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
#include "llvm/Value.h"
#include "llvm/ADT/BitVector.h"
@ -736,6 +737,7 @@ namespace {
SmallVector<Instruction*, 4>& toErase);
bool processNonLocalLoad(LoadInst* L,
SmallVector<Instruction*, 4>& toErase);
bool processMemCpy(MemCpyInst* M, SmallVector<Instruction*, 4>& toErase);
Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
DenseMap<BasicBlock*, Value*> &Phis,
bool top_level = false);
@ -1044,6 +1046,79 @@ bool GVN::processLoad(LoadInst* L,
return deletedLoad;
}
/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which
/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
/// This allows later passes to remove the first memcpy altogether.
bool GVN::processMemCpy(MemCpyInst* M,
SmallVector<Instruction*, 4>& toErase) {
MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
// First, we have to check that the dependency is another memcpy
Instruction* dep = MD.getDependency(M);
if (dep == MemoryDependenceAnalysis::None ||
dep == MemoryDependenceAnalysis::NonLocal ||
!isa<MemCpyInst>(dep))
return false;
// We can only transforms memcpy's where the dest of one is the source of the
// other
MemCpyInst* MDep = cast<MemCpyInst>(dep);
if (M->getSource() != MDep->getDest())
return false;
// Second, the length of the memcpy's must be the same, or the preceeding one
// must be larger than the following one.
ConstantInt* C1 = dyn_cast<ConstantInt>(MDep->getLength());
ConstantInt* C2 = dyn_cast<ConstantInt>(M->getLength());
if (!C1 || !C2)
return false;
uint64_t CpySize = C1->getValue().getZExtValue();
uint64_t DepSize = C2->getValue().getZExtValue();
if (DepSize < CpySize)
return false;
// Finally, we have to make sure that the dest of the second does not
// alias the source of the first
AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
AliasAnalysis::NoAlias)
return false;
else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
AliasAnalysis::NoAlias)
return false;
else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
!= AliasAnalysis::NoAlias)
return false;
// If all checks passed, then we can transform these memcpy's
bool is32bit = M->getIntrinsicID() == Intrinsic::memcpy_i32;
Function* MemMoveFun = Intrinsic::getDeclaration(
M->getParent()->getParent()->getParent(),
is32bit ? Intrinsic::memcpy_i32 :
Intrinsic::memcpy_i64);
std::vector<Value*> args;
args.push_back(M->getRawDest());
args.push_back(MDep->getRawSource());
args.push_back(M->getLength());
args.push_back(M->getAlignment());
CallInst* C = new CallInst(MemMoveFun, args.begin(), args.end(), "", M);
if (MD.getDependency(C) == MDep) {
MD.dropInstruction(M);
toErase.push_back(M);
return true;
} else {
MD.removeInstruction(C);
toErase.push_back(C);
return false;
}
}
/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
bool GVN::processInstruction(Instruction* I,
@ -1052,6 +1127,8 @@ bool GVN::processInstruction(Instruction* I,
SmallVector<Instruction*, 4>& toErase) {
if (LoadInst* L = dyn_cast<LoadInst>(I)) {
return processLoad(L, lastSeenLoad, toErase);
} else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
return processMemCpy(M, toErase);
}
unsigned num = VN.lookup_or_add(I);
@ -1161,8 +1238,9 @@ bool GVN::iterateOnFunction(Function &F) {
++BI;
for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
E = toErase.end(); I != E; ++I)
E = toErase.end(); I != E; ++I) {
(*I)->eraseFromParent();
}
toErase.clear();
}

View File

@ -1,5 +1,4 @@
; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | grep memcpy | count 2
; XFAIL: *
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i686-apple-darwin9"