mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-26 20:57:15 +00:00
add support for forwarding mem intrinsic values to non-local loads.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@90697 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0fce29b9d1
commit
cb9cbc4949
@ -1192,19 +1192,47 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
|
||||
struct AvailableValueInBlock {
|
||||
/// BB - The basic block in question.
|
||||
BasicBlock *BB;
|
||||
enum ValType {
|
||||
SimpleVal, // A simple offsetted value that is accessed.
|
||||
MemIntrin // A memory intrinsic which is loaded from.
|
||||
};
|
||||
|
||||
/// V - The value that is live out of the block.
|
||||
Value *V;
|
||||
/// Offset - The byte offset in V that is interesting for the load query.
|
||||
PointerIntPair<Value *, 1, ValType> Val;
|
||||
|
||||
/// Offset - The byte offset in Val that is interesting for the load query.
|
||||
unsigned Offset;
|
||||
|
||||
static AvailableValueInBlock get(BasicBlock *BB, Value *V,
|
||||
unsigned Offset = 0) {
|
||||
AvailableValueInBlock Res;
|
||||
Res.BB = BB;
|
||||
Res.V = V;
|
||||
Res.Val.setPointer(V);
|
||||
Res.Val.setInt(SimpleVal);
|
||||
Res.Offset = Offset;
|
||||
return Res;
|
||||
}
|
||||
|
||||
static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
|
||||
unsigned Offset = 0) {
|
||||
AvailableValueInBlock Res;
|
||||
Res.BB = BB;
|
||||
Res.Val.setPointer(MI);
|
||||
Res.Val.setInt(MemIntrin);
|
||||
Res.Offset = Offset;
|
||||
return Res;
|
||||
}
|
||||
|
||||
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
|
||||
Value *getSimpleValue() const {
|
||||
assert(isSimpleValue() && "Wrong accessor");
|
||||
return Val.getPointer();
|
||||
}
|
||||
|
||||
MemIntrinsic *getMemIntrinValue() const {
|
||||
assert(!isSimpleValue() && "Wrong accessor");
|
||||
return cast<MemIntrinsic>(Val.getPointer());
|
||||
}
|
||||
};
|
||||
|
||||
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
|
||||
@ -1221,30 +1249,33 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
|
||||
const Type *LoadTy = LI->getType();
|
||||
|
||||
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
|
||||
BasicBlock *BB = ValuesPerBlock[i].BB;
|
||||
Value *AvailableVal = ValuesPerBlock[i].V;
|
||||
unsigned Offset = ValuesPerBlock[i].Offset;
|
||||
const AvailableValueInBlock &AV = ValuesPerBlock[i];
|
||||
BasicBlock *BB = AV.BB;
|
||||
|
||||
if (SSAUpdate.HasValueForBlock(BB))
|
||||
continue;
|
||||
|
||||
if (AvailableVal->getType() != LoadTy) {
|
||||
assert(TD && "Need target data to handle type mismatch case");
|
||||
AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
|
||||
BB->getTerminator(), *TD);
|
||||
|
||||
if (Offset) {
|
||||
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
|
||||
<< *ValuesPerBlock[i].V << '\n'
|
||||
|
||||
unsigned Offset = AV.Offset;
|
||||
|
||||
Value *AvailableVal;
|
||||
if (AV.isSimpleValue()) {
|
||||
AvailableVal = AV.getSimpleValue();
|
||||
if (AvailableVal->getType() != LoadTy) {
|
||||
assert(TD && "Need target data to handle type mismatch case");
|
||||
AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
|
||||
BB->getTerminator(), *TD);
|
||||
|
||||
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
|
||||
<< *AV.getSimpleValue() << '\n'
|
||||
<< *AvailableVal << '\n' << "\n\n\n");
|
||||
}
|
||||
|
||||
|
||||
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
|
||||
<< *ValuesPerBlock[i].V << '\n'
|
||||
} else {
|
||||
AvailableVal = GetMemInstValueForLoad(AV.getMemIntrinValue(), Offset,
|
||||
LoadTy, BB->getTerminator(), *TD);
|
||||
DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
|
||||
<< " " << *AV.getMemIntrinValue() << '\n'
|
||||
<< *AvailableVal << '\n' << "\n\n\n");
|
||||
}
|
||||
|
||||
SSAUpdate.AddAvailableValue(BB, AvailableVal);
|
||||
}
|
||||
|
||||
@ -1324,19 +1355,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
// If the clobbering value is a memset/memcpy/memmove, see if we can
|
||||
// forward a value on from it.
|
||||
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
|
||||
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
|
||||
if (TD == 0)
|
||||
TD = getAnalysisIfAvailable<TargetData>();
|
||||
if (TD) {
|
||||
int Offset = AnalyzeLoadFromClobberingMemInst(L, DepMI, *TD);
|
||||
if (Offset != -1)
|
||||
AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
|
||||
int Offset = AnalyzeLoadFromClobberingMemInst(LI, DepMI, *TD);
|
||||
if (Offset != -1) {
|
||||
ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
|
||||
Offset));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
UnavailableBlocks.push_back(DepBB);
|
||||
continue;
|
||||
@ -1462,19 +1494,25 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
|
||||
// to eliminate LI even if we insert uses in the other predecessors, we will
|
||||
// end up increasing code size. Reject this by scanning for LI.
|
||||
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
|
||||
if (ValuesPerBlock[i].V == LI)
|
||||
if (ValuesPerBlock[i].isSimpleValue() &&
|
||||
ValuesPerBlock[i].getSimpleValue() == LI)
|
||||
return false;
|
||||
|
||||
// FIXME: It is extremely unclear what this loop is doing, other than
|
||||
// artificially restricting loadpre.
|
||||
if (isSinglePred) {
|
||||
bool isHot = false;
|
||||
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
|
||||
if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].V))
|
||||
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
|
||||
const AvailableValueInBlock &AV = ValuesPerBlock[i];
|
||||
if (AV.isSimpleValue())
|
||||
// "Hot" Instruction is in some loop (because it dominates its dep.
|
||||
// instruction).
|
||||
if (DT->dominates(LI, I)) {
|
||||
isHot = true;
|
||||
break;
|
||||
}
|
||||
if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
|
||||
if (DT->dominates(LI, I)) {
|
||||
isHot = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// We are interested only in "hot" instructions. We don't want to do any
|
||||
// mis-optimizations here.
|
||||
|
@ -163,6 +163,31 @@ entry:
|
||||
; CHECK-NEXT: ret float
|
||||
}
|
||||
|
||||
;; non-local memset -> i16 load forwarding.
|
||||
define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) {
|
||||
%P3 = bitcast i16* %P to i8*
|
||||
br i1 %cond, label %T, label %F
|
||||
T:
|
||||
tail call void @llvm.memset.i64(i8* %P3, i8 1, i64 400, i32 1)
|
||||
br label %Cont
|
||||
|
||||
F:
|
||||
tail call void @llvm.memset.i64(i8* %P3, i8 2, i64 400, i32 1)
|
||||
br label %Cont
|
||||
|
||||
Cont:
|
||||
%P2 = getelementptr i16* %P, i32 4
|
||||
%A = load i16* %P2
|
||||
ret i16 %A
|
||||
|
||||
; CHECK: @memset_to_i16_nonlocal0
|
||||
; CHECK: Cont:
|
||||
; CHECK-NEXT: %A = phi i16 [ 514, %F ], [ 257, %T ]
|
||||
; CHECK-NOT: load
|
||||
; CHECK: ret i16 %A
|
||||
}
|
||||
|
||||
|
||||
declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
|
||||
|
||||
|
||||
@ -192,6 +217,7 @@ Cont:
|
||||
; CHECK: ret i8 %A
|
||||
}
|
||||
|
||||
|
||||
;; non-local i32/float -> i8 load forwarding. This also tests that the "P3"
|
||||
;; bitcast equivalence can be properly phi translated.
|
||||
define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user