Move CoerceAvailableValueToLoadType earlier in GVN.cpp. Hook it up

so that nonlocal and partially redundant loads can use it as well.
The testcase shows examples of craziness this can handle.  This triggers
*many* times in 176.gcc.

llvm-svn: 82403
This commit is contained in:
Chris Lattner 2009-09-20 20:09:34 +00:00
parent 279ec561c6
commit b7d34131d1
2 changed files with 203 additions and 108 deletions

View File

@ -937,6 +937,115 @@ SpeculationFailure:
return false; return false;
} }
/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
/// then a load from a must-aliased pointer of a different type, try to coerce
/// the stored value. LoadedTy is the type of the load we want to replace and
/// InsertPt is the place to insert new instructions.
///
/// If we can't do it, return null.
static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
const Type *LoadedTy,
Instruction *InsertPt,
const TargetData &TD) {
const Type *StoredValTy = StoredVal->getType();
uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
if (isa<PointerType>(StoredValTy) && isa<PointerType>(LoadedTy)) {
// Pointer to Pointer -> use bitcast.
return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
}
// Convert source pointers to integers, which can be bitcast.
if (isa<PointerType>(StoredValTy)) {
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
const Type *TypeToCastTo = LoadedTy;
if (isa<PointerType>(TypeToCastTo))
TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
if (StoredValTy != TypeToCastTo)
StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
// Cast to pointer if the load needs a pointer type.
if (isa<PointerType>(LoadedTy))
StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
return StoredVal;
}
// If the loaded value is smaller than the available value, then we can
// extract out a piece from it. If the available value is too small, then we
// can't do anything.
if (StoreSize < LoadSize)
return 0;
// Convert source pointers to integers, which can be manipulated.
if (isa<PointerType>(StoredValTy)) {
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
// Convert vectors and fp to integer, which can be manipulated.
if (!isa<IntegerType>(StoredValTy)) {
StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
}
// If this is a big-endian system, we need to shift the value down to the low
// bits so that a truncate will work.
if (TD.isBigEndian()) {
Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
}
// Truncate the integer to the right size now.
const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
if (LoadedTy == NewIntTy)
return StoredVal;
// If the result is a pointer, inttoptr.
if (isa<PointerType>(LoadedTy))
return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
// Otherwise, bitcast.
return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
}
static void
GetAvailableBlockValues(DenseMap<BasicBlock*, Value*> &BlockReplValues,
SmallVector<std::pair<BasicBlock*,
Value*>, 16> &ValuesPerBlock,
const Type *LoadTy,
const TargetData *TD) {
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
BasicBlock *BB = ValuesPerBlock[i].first;
Value *AvailableVal = ValuesPerBlock[i].second;
Value *&BlockEntry = BlockReplValues[BB];
if (BlockEntry) continue;
if (AvailableVal->getType() != LoadTy) {
assert(TD && "Need target data to handle type mismatch case");
AvailableVal = CoerceAvailableValueToLoadType(AvailableVal, LoadTy,
BB->getTerminator(), *TD);
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
<< *ValuesPerBlock[i].second << '\n'
<< *AvailableVal << '\n' << "\n\n\n");
}
BlockEntry = AvailableVal;
}
}
/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
/// non-local by performing PHI construction. /// non-local by performing PHI construction.
bool GVN::processNonLocalLoad(LoadInst *LI, bool GVN::processNonLocalLoad(LoadInst *LI,
@ -972,6 +1081,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
SmallVector<std::pair<BasicBlock*, Value*>, 16> ValuesPerBlock; SmallVector<std::pair<BasicBlock*, Value*>, 16> ValuesPerBlock;
SmallVector<BasicBlock*, 16> UnavailableBlocks; SmallVector<BasicBlock*, 16> UnavailableBlocks;
const TargetData *TD = 0;
for (unsigned i = 0, e = Deps.size(); i != e; ++i) { for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
BasicBlock *DepBB = Deps[i].first; BasicBlock *DepBB = Deps[i].first;
MemDepResult DepInfo = Deps[i].second; MemDepResult DepInfo = Deps[i].second;
@ -992,24 +1103,41 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
if (StoreInst* S = dyn_cast<StoreInst>(DepInst)) { if (StoreInst* S = dyn_cast<StoreInst>(DepInst)) {
// Reject loads and stores that are to the same address but are of // Reject loads and stores that are to the same address but are of
// different types. // different types if we have to.
// NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because
// of bitfield access, it would be interesting to optimize for it at some
// point.
if (S->getOperand(0)->getType() != LI->getType()) { if (S->getOperand(0)->getType() != LI->getType()) {
UnavailableBlocks.push_back(DepBB); if (TD == 0)
continue; TD = getAnalysisIfAvailable<TargetData>();
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
if (TD == 0 ||
TD->getTypeSizeInBits(S->getOperand(0)->getType()) <
TD->getTypeSizeInBits(LI->getType())) {
UnavailableBlocks.push_back(DepBB);
continue;
}
} }
ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0))); ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0)));
} else if (LoadInst* LD = dyn_cast<LoadInst>(DepInst)) { } else if (LoadInst* LD = dyn_cast<LoadInst>(DepInst)) {
// If the types mismatch and we can't handle it, reject reuse of the load.
if (LD->getType() != LI->getType()) { if (LD->getType() != LI->getType()) {
UnavailableBlocks.push_back(DepBB); if (TD == 0)
continue; TD = getAnalysisIfAvailable<TargetData>();
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
if (TD == 0 ||
TD->getTypeSizeInBits(LD->getType()) <
TD->getTypeSizeInBits(LI->getType())) {
UnavailableBlocks.push_back(DepBB);
continue;
}
} }
ValuesPerBlock.push_back(std::make_pair(DepBB, LD)); ValuesPerBlock.push_back(std::make_pair(DepBB, LD));
} else { } else {
// FIXME: Handle memset/memcpy.
UnavailableBlocks.push_back(DepBB); UnavailableBlocks.push_back(DepBB);
continue; continue;
} }
@ -1043,16 +1171,18 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
// Convert the block information to a map, and insert coersions as needed.
DenseMap<BasicBlock*, Value*> BlockReplValues; DenseMap<BasicBlock*, Value*> BlockReplValues;
BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); GetAvailableBlockValues(BlockReplValues, ValuesPerBlock, LI->getType(), TD);
// Perform PHI construction. // Perform PHI construction.
Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); Value *V = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true);
LI->replaceAllUsesWith(v); LI->replaceAllUsesWith(V);
if (isa<PHINode>(v)) if (isa<PHINode>(V))
v->takeName(LI); V->takeName(LI);
if (isa<PointerType>(v->getType())) if (isa<PointerType>(V->getType()))
MD->invalidateCachedPointerInfo(v); MD->invalidateCachedPointerInfo(V);
toErase.push_back(LI); toErase.push_back(LI);
NumGVNLoad++; NumGVNLoad++;
return true; return true;
@ -1196,104 +1326,21 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I));
DenseMap<BasicBlock*, Value*> BlockReplValues; DenseMap<BasicBlock*, Value*> BlockReplValues;
BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); GetAvailableBlockValues(BlockReplValues, ValuesPerBlock, LI->getType(), TD);
BlockReplValues[UnavailablePred] = NewLoad; BlockReplValues[UnavailablePred] = NewLoad;
// Perform PHI construction. // Perform PHI construction.
Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); Value *V = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true);
LI->replaceAllUsesWith(v); LI->replaceAllUsesWith(V);
if (isa<PHINode>(v)) if (isa<PHINode>(V))
v->takeName(LI); V->takeName(LI);
if (isa<PointerType>(v->getType())) if (isa<PointerType>(V->getType()))
MD->invalidateCachedPointerInfo(v); MD->invalidateCachedPointerInfo(V);
toErase.push_back(LI); toErase.push_back(LI);
NumPRELoad++; NumPRELoad++;
return true; return true;
} }
/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
/// then a load from a must-aliased pointer of a different type, try to coerce
/// the stored value. LoadedTy is the type of the load we want to replace and
/// InsertPt is the place to insert new instructions.
///
/// If we can't do it, return null.
static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
const Type *LoadedTy,
Instruction *InsertPt,
const TargetData &TD) {
const Type *StoredValTy = StoredVal->getType();
uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
if (isa<PointerType>(StoredValTy) && isa<PointerType>(LoadedTy)) {
// Pointer to Pointer -> use bitcast.
return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
}
// Convert source pointers to integers, which can be bitcast.
if (isa<PointerType>(StoredValTy)) {
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
const Type *TypeToCastTo = LoadedTy;
if (isa<PointerType>(TypeToCastTo))
TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
if (StoredValTy != TypeToCastTo)
StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
// Cast to pointer if the load needs a pointer type.
if (isa<PointerType>(LoadedTy))
StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
return StoredVal;
}
// If the loaded value is smaller than the available value, then we can
// extract out a piece from it. If the available value is too small, then we
// can't do anything.
if (StoreSize < LoadSize)
return 0;
// Convert source pointers to integers, which can be manipulated.
if (isa<PointerType>(StoredValTy)) {
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
// Convert vectors and fp to integer, which can be manipulated.
if (!isa<IntegerType>(StoredValTy)) {
StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
}
// If this is a big-endian system, we need to shift the value down to the low
// bits so that a truncate will work.
if (TD.isBigEndian()) {
Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
}
// Truncate the integer to the right size now.
const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
if (LoadedTy == NewIntTy)
return StoredVal;
// If the result is a pointer, inttoptr.
if (isa<PointerType>(LoadedTy))
return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
// Otherwise, bitcast.
return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
}
/// processLoad - Attempt to eliminate a load, first by eliminating it /// processLoad - Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails. /// locally, and then attempting non-local elimination if that fails.
bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {

View File

@ -60,11 +60,11 @@ define i32* @coerce_mustalias3(float %V, float* %P) {
;; i32 -> f32 load forwarding. ;; i32 -> f32 load forwarding.
define float @coerce_mustalias4(i32* %P, i1 %cond) { define float @coerce_mustalias4(i32* %P, i1 %cond) {
%A = load i32* %P %A = load i32* %P
br i1 %cond, label %T, label %T
T:
%P2 = bitcast i32* %P to float* %P2 = bitcast i32* %P to float*
%B = load float* %P2 %B = load float* %P2
br i1 %cond, label %T, label %F
T:
ret float %B ret float %B
F: F:
@ -117,3 +117,51 @@ define i8* @coerce_mustalias7(i64 %V, i64* %P) {
; CHECK: ret i8* ; CHECK: ret i8*
} }
;; non-local i32/float -> i8 load forwarding.
define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
%P2 = bitcast i32* %P to float*
%P3 = bitcast i32* %P to i8*
br i1 %cond, label %T, label %F
T:
store i32 42, i32* %P
br label %Cont
F:
store float 1.0, float* %P2
br label %Cont
Cont:
%A = load i8* %P3
ret i8 %A
; CHECK: @coerce_mustalias_nonlocal0
; CHECK: Cont:
; CHECK: %A = phi i8 [
; CHECK-NOT: load
; CHECK: ret i8 %A
}
;; non-local i32 -> i8 partial redundancy load forwarding.
define i8 @coerce_mustalias_pre0(i32* %P, i1 %cond) {
%P3 = bitcast i32* %P to i8*
br i1 %cond, label %T, label %F
T:
store i32 42, i32* %P
br label %Cont
F:
br label %Cont
Cont:
%A = load i8* %P3
ret i8 %A
; CHECK: @coerce_mustalias_pre0
; CHECK: F:
; CHECK: load i8* %P3
; CHECK: Cont:
; CHECK: %A = phi i8 [
; CHECK-NOT: load
; CHECK: ret i8 %A
}