mirror of
https://github.com/RPCSX/llvm.git
synced 2025-03-06 03:47:44 +00:00
Rework the rewriting of loads and stores for vector and integer allocas
to properly handle the combinations of these with split integer loads and stores. This essentially replaces Evan's r168227 by refactoring the code in a different way, and trynig to mirror that refactoring in both the load and store sides of the rewriting. Generally speaking there was some really problematic duplicated code here that led to poorly founded assumptions and then subtle bugs. Now much of the code actually flows through and follows a more consistent style and logical path. There is still a tiny bit of duplication on the store side of things, but it is much less bad. This also changes the logic to never re-use a load or store instruction as that was simply too error prone in practice. I've added a few tests (one a reduction of the one in Evan's original patch, which happened to be the same as the report in PR14349). I'm going to look at adding a few more tests for things I found and fixed in passing (such as the volatile tests in the vectorizable predicate). This patch has survived bootstrap, and modulo one bugfix survived Duncan's test suite, but let me know if anything else explodes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168346 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1f9f73a4c6
commit
f5837aacd4
@ -1382,11 +1382,7 @@ class SROA : public FunctionPass {
|
|||||||
/// \brief A collection of instructions to delete.
|
/// \brief A collection of instructions to delete.
|
||||||
/// We try to batch deletions to simplify code and make things a bit more
|
/// We try to batch deletions to simplify code and make things a bit more
|
||||||
/// efficient.
|
/// efficient.
|
||||||
SmallVector<Instruction *, 8> DeadInsts;
|
SetVector<Instruction *, SmallVector<Instruction *, 8> > DeadInsts;
|
||||||
|
|
||||||
/// \brief A set to prevent repeatedly marking an instruction split into many
|
|
||||||
/// uses as dead. Only used to guard insertion into DeadInsts.
|
|
||||||
SmallPtrSet<Instruction *, 4> DeadSplitInsts;
|
|
||||||
|
|
||||||
/// \brief Post-promotion worklist.
|
/// \brief Post-promotion worklist.
|
||||||
///
|
///
|
||||||
@ -1573,7 +1569,7 @@ private:
|
|||||||
do {
|
do {
|
||||||
LoadInst *LI = Loads.pop_back_val();
|
LoadInst *LI = Loads.pop_back_val();
|
||||||
LI->replaceAllUsesWith(NewPN);
|
LI->replaceAllUsesWith(NewPN);
|
||||||
Pass.DeadInsts.push_back(LI);
|
Pass.DeadInsts.insert(LI);
|
||||||
} while (!Loads.empty());
|
} while (!Loads.empty());
|
||||||
|
|
||||||
// Inject loads into all of the pred blocks.
|
// Inject loads into all of the pred blocks.
|
||||||
@ -1717,7 +1713,7 @@ private:
|
|||||||
|
|
||||||
DEBUG(dbgs() << " speculated to: " << *V << "\n");
|
DEBUG(dbgs() << " speculated to: " << *V << "\n");
|
||||||
LI->replaceAllUsesWith(V);
|
LI->replaceAllUsesWith(V);
|
||||||
Pass.DeadInsts.push_back(LI);
|
Pass.DeadInsts.insert(LI);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -2134,8 +2130,13 @@ static bool isVectorPromotionViable(const DataLayout &TD,
|
|||||||
} else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
|
} else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
|
||||||
// Disable vector promotion when there are loads or stores of an FCA.
|
// Disable vector promotion when there are loads or stores of an FCA.
|
||||||
return false;
|
return false;
|
||||||
} else if (!isa<LoadInst>(I->U->getUser()) &&
|
} else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
|
||||||
!isa<StoreInst>(I->U->getUser())) {
|
if (LI->isVolatile())
|
||||||
|
return false;
|
||||||
|
} else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
|
||||||
|
if (SI->isVolatile())
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2241,18 +2242,23 @@ static bool isIntegerWideningViable(const DataLayout &TD,
|
|||||||
static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
|
static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
|
||||||
IntegerType *Ty, uint64_t Offset,
|
IntegerType *Ty, uint64_t Offset,
|
||||||
const Twine &Name) {
|
const Twine &Name) {
|
||||||
|
DEBUG(dbgs() << " start: " << *V << "\n");
|
||||||
IntegerType *IntTy = cast<IntegerType>(V->getType());
|
IntegerType *IntTy = cast<IntegerType>(V->getType());
|
||||||
assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
|
assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
|
||||||
"Element extends past full value");
|
"Element extends past full value");
|
||||||
uint64_t ShAmt = 8*Offset;
|
uint64_t ShAmt = 8*Offset;
|
||||||
if (DL.isBigEndian())
|
if (DL.isBigEndian())
|
||||||
ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
|
ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
|
||||||
if (ShAmt)
|
if (ShAmt) {
|
||||||
V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
|
V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
|
||||||
|
DEBUG(dbgs() << " shifted: " << *V << "\n");
|
||||||
|
}
|
||||||
assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
|
assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
|
||||||
"Cannot extract to a larger integer!");
|
"Cannot extract to a larger integer!");
|
||||||
if (Ty != IntTy)
|
if (Ty != IntTy) {
|
||||||
V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
|
V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
|
||||||
|
DEBUG(dbgs() << " trunced: " << *V << "\n");
|
||||||
|
}
|
||||||
return V;
|
return V;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2262,20 +2268,27 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
|
|||||||
IntegerType *Ty = cast<IntegerType>(V->getType());
|
IntegerType *Ty = cast<IntegerType>(V->getType());
|
||||||
assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
|
assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
|
||||||
"Cannot insert a larger integer!");
|
"Cannot insert a larger integer!");
|
||||||
if (Ty != IntTy)
|
DEBUG(dbgs() << " start: " << *V << "\n");
|
||||||
|
if (Ty != IntTy) {
|
||||||
V = IRB.CreateZExt(V, IntTy, Name + ".ext");
|
V = IRB.CreateZExt(V, IntTy, Name + ".ext");
|
||||||
|
DEBUG(dbgs() << " extended: " << *V << "\n");
|
||||||
|
}
|
||||||
assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
|
assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
|
||||||
"Element store outside of alloca store");
|
"Element store outside of alloca store");
|
||||||
uint64_t ShAmt = 8*Offset;
|
uint64_t ShAmt = 8*Offset;
|
||||||
if (DL.isBigEndian())
|
if (DL.isBigEndian())
|
||||||
ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
|
ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
|
||||||
if (ShAmt)
|
if (ShAmt) {
|
||||||
V = IRB.CreateShl(V, ShAmt, Name + ".shift");
|
V = IRB.CreateShl(V, ShAmt, Name + ".shift");
|
||||||
|
DEBUG(dbgs() << " shifted: " << *V << "\n");
|
||||||
|
}
|
||||||
|
|
||||||
if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
|
if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
|
||||||
APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
|
APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
|
||||||
Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
|
Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
|
||||||
|
DEBUG(dbgs() << " masked: " << *Old << "\n");
|
||||||
V = IRB.CreateOr(Old, V, Name + ".insert");
|
V = IRB.CreateOr(Old, V, Name + ".insert");
|
||||||
|
DEBUG(dbgs() << " inserted: " << *V << "\n");
|
||||||
}
|
}
|
||||||
return V;
|
return V;
|
||||||
}
|
}
|
||||||
@ -2442,30 +2455,21 @@ private:
|
|||||||
void deleteIfTriviallyDead(Value *V) {
|
void deleteIfTriviallyDead(Value *V) {
|
||||||
Instruction *I = cast<Instruction>(V);
|
Instruction *I = cast<Instruction>(V);
|
||||||
if (isInstructionTriviallyDead(I))
|
if (isInstructionTriviallyDead(I))
|
||||||
Pass.DeadInsts.push_back(I);
|
Pass.DeadInsts.insert(I);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
|
Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
|
||||||
Value *Result;
|
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
||||||
|
getName(".load"));
|
||||||
if (LI.getType() == VecTy->getElementType() ||
|
if (LI.getType() == VecTy->getElementType() ||
|
||||||
BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
|
BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
|
||||||
Result = IRB.CreateExtractElement(
|
V = IRB.CreateExtractElement(V, getIndex(IRB, BeginOffset),
|
||||||
IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
|
getName(".extract"));
|
||||||
getIndex(IRB, BeginOffset), getName(".extract"));
|
|
||||||
} else {
|
|
||||||
Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
|
||||||
getName(".load"));
|
|
||||||
}
|
}
|
||||||
if (Result->getType() != LI.getType())
|
return V;
|
||||||
Result = convertValue(TD, IRB, Result, LI.getType());
|
|
||||||
LI.replaceAllUsesWith(Result);
|
|
||||||
Pass.DeadInsts.push_back(&LI);
|
|
||||||
|
|
||||||
DEBUG(dbgs() << " to: " << *Result << "\n");
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
|
Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
|
||||||
assert(IntTy && "We cannot insert an integer to the alloca");
|
assert(IntTy && "We cannot insert an integer to the alloca");
|
||||||
assert(!LI.isVolatile());
|
assert(!LI.isVolatile());
|
||||||
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
||||||
@ -2473,12 +2477,10 @@ private:
|
|||||||
V = convertValue(TD, IRB, V, IntTy);
|
V = convertValue(TD, IRB, V, IntTy);
|
||||||
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
|
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
|
||||||
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
|
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
|
||||||
V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
|
if (Offset > 0 || EndOffset < NewAllocaEndOffset)
|
||||||
getName(".extract"));
|
V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
|
||||||
LI.replaceAllUsesWith(V);
|
getName(".extract"));
|
||||||
Pass.DeadInsts.push_back(&LI);
|
return V;
|
||||||
DEBUG(dbgs() << " to: " << *V << "\n");
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool visitLoadInst(LoadInst &LI) {
|
bool visitLoadInst(LoadInst &LI) {
|
||||||
@ -2488,7 +2490,29 @@ private:
|
|||||||
IRBuilder<> IRB(&LI);
|
IRBuilder<> IRB(&LI);
|
||||||
|
|
||||||
uint64_t Size = EndOffset - BeginOffset;
|
uint64_t Size = EndOffset - BeginOffset;
|
||||||
if (Size < TD.getTypeStoreSize(LI.getType())) {
|
bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
|
||||||
|
Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
|
||||||
|
: LI.getType();
|
||||||
|
bool IsPtrAdjusted = false;
|
||||||
|
Value *V;
|
||||||
|
if (VecTy) {
|
||||||
|
V = rewriteVectorizedLoadInst(IRB, LI, OldOp);
|
||||||
|
} else if (IntTy && LI.getType()->isIntegerTy()) {
|
||||||
|
V = rewriteIntegerLoad(IRB, LI);
|
||||||
|
} else if (BeginOffset == NewAllocaBeginOffset &&
|
||||||
|
canConvertValue(TD, NewAllocaTy, LI.getType())) {
|
||||||
|
V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
||||||
|
LI.isVolatile(), getName(".load"));
|
||||||
|
} else {
|
||||||
|
Type *LTy = TargetTy->getPointerTo();
|
||||||
|
V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy),
|
||||||
|
getPartitionTypeAlign(TargetTy),
|
||||||
|
LI.isVolatile(), getName(".load"));
|
||||||
|
IsPtrAdjusted = true;
|
||||||
|
}
|
||||||
|
V = convertValue(TD, IRB, V, TargetTy);
|
||||||
|
|
||||||
|
if (IsSplitIntLoad) {
|
||||||
assert(!LI.isVolatile());
|
assert(!LI.isVolatile());
|
||||||
assert(LI.getType()->isIntegerTy() &&
|
assert(LI.getType()->isIntegerTy() &&
|
||||||
"Only integer type loads and stores are split");
|
"Only integer type loads and stores are split");
|
||||||
@ -2498,21 +2522,8 @@ private:
|
|||||||
assert(LI.getType()->getIntegerBitWidth() ==
|
assert(LI.getType()->getIntegerBitWidth() ==
|
||||||
TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
|
TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
|
||||||
"Only alloca-wide loads can be split and recomposed");
|
"Only alloca-wide loads can be split and recomposed");
|
||||||
IntegerType *NarrowTy = Type::getIntNTy(LI.getContext(), Size * 8);
|
|
||||||
bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
|
|
||||||
canConvertValue(TD, NewAllocaTy, NarrowTy);
|
|
||||||
Value *V;
|
|
||||||
// Move the insertion point just past the load so that we can refer to it.
|
// Move the insertion point just past the load so that we can refer to it.
|
||||||
IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
|
IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
|
||||||
if (IsConvertable)
|
|
||||||
V = convertValue(TD, IRB,
|
|
||||||
IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
|
||||||
getName(".load")),
|
|
||||||
NarrowTy);
|
|
||||||
else
|
|
||||||
V = IRB.CreateAlignedLoad(
|
|
||||||
getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
|
|
||||||
getPartitionTypeAlign(NarrowTy), getName(".load"));
|
|
||||||
// Create a placeholder value with the same type as LI to use as the
|
// Create a placeholder value with the same type as LI to use as the
|
||||||
// basis for the new value. This allows us to replace the uses of LI with
|
// basis for the new value. This allows us to replace the uses of LI with
|
||||||
// the computed value, and then replace the placeholder with LI, leaving
|
// the computed value, and then replace the placeholder with LI, leaving
|
||||||
@ -2524,104 +2535,40 @@ private:
|
|||||||
LI.replaceAllUsesWith(V);
|
LI.replaceAllUsesWith(V);
|
||||||
Placeholder->replaceAllUsesWith(&LI);
|
Placeholder->replaceAllUsesWith(&LI);
|
||||||
delete Placeholder;
|
delete Placeholder;
|
||||||
if (Pass.DeadSplitInsts.insert(&LI))
|
} else {
|
||||||
Pass.DeadInsts.push_back(&LI);
|
LI.replaceAllUsesWith(V);
|
||||||
DEBUG(dbgs() << " to: " << *V << "\n");
|
|
||||||
return IsConvertable;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VecTy)
|
Pass.DeadInsts.insert(&LI);
|
||||||
return rewriteVectorizedLoadInst(IRB, LI, OldOp);
|
|
||||||
if (IntTy && LI.getType()->isIntegerTy())
|
|
||||||
return rewriteIntegerLoad(IRB, LI);
|
|
||||||
|
|
||||||
if (BeginOffset == NewAllocaBeginOffset &&
|
|
||||||
canConvertValue(TD, NewAllocaTy, LI.getType())) {
|
|
||||||
Value *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
|
||||||
LI.isVolatile(), getName(".load"));
|
|
||||||
Value *NewV = convertValue(TD, IRB, NewLI, LI.getType());
|
|
||||||
LI.replaceAllUsesWith(NewV);
|
|
||||||
Pass.DeadInsts.push_back(&LI);
|
|
||||||
|
|
||||||
DEBUG(dbgs() << " to: " << *NewLI << "\n");
|
|
||||||
return !LI.isVolatile();
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(!IntTy && "Invalid load found with int-op widening enabled");
|
|
||||||
|
|
||||||
Value *NewPtr = getAdjustedAllocaPtr(IRB,
|
|
||||||
LI.getPointerOperand()->getType());
|
|
||||||
LI.setOperand(0, NewPtr);
|
|
||||||
LI.setAlignment(getPartitionTypeAlign(LI.getType()));
|
|
||||||
DEBUG(dbgs() << " to: " << LI << "\n");
|
|
||||||
|
|
||||||
deleteIfTriviallyDead(OldOp);
|
deleteIfTriviallyDead(OldOp);
|
||||||
return NewPtr == &NewAI && !LI.isVolatile();
|
DEBUG(dbgs() << " to: " << *V << "\n");
|
||||||
|
return !LI.isVolatile() && !IsPtrAdjusted;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool rewriteWideStoreInst(IRBuilder<> &IRB, StoreInst &SI, Type *ValueTy,
|
bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V,
|
||||||
unsigned Size) {
|
StoreInst &SI, Value *OldOp) {
|
||||||
assert(!SI.isVolatile());
|
if (V->getType() == ElementTy ||
|
||||||
assert(ValueTy->isIntegerTy() &&
|
|
||||||
"Only integer type loads and stores are split");
|
|
||||||
assert(ValueTy->getIntegerBitWidth() ==
|
|
||||||
TD.getTypeStoreSizeInBits(ValueTy) &&
|
|
||||||
"Non-byte-multiple bit width");
|
|
||||||
assert(ValueTy->getIntegerBitWidth() ==
|
|
||||||
TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
|
|
||||||
"Only alloca-wide stores can be split and recomposed");
|
|
||||||
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
|
|
||||||
Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy,
|
|
||||||
BeginOffset, getName(".extract"));
|
|
||||||
StoreInst *NewSI;
|
|
||||||
bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
|
|
||||||
canConvertValue(TD, NarrowTy, NewAllocaTy);
|
|
||||||
if (IsConvertable)
|
|
||||||
NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy),
|
|
||||||
&NewAI, NewAI.getAlignment());
|
|
||||||
else
|
|
||||||
NewSI = IRB.CreateAlignedStore(
|
|
||||||
V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
|
|
||||||
getPartitionTypeAlign(NarrowTy));
|
|
||||||
(void)NewSI;
|
|
||||||
if (Pass.DeadSplitInsts.insert(&SI))
|
|
||||||
Pass.DeadInsts.push_back(&SI);
|
|
||||||
|
|
||||||
DEBUG(dbgs() << " to: " << *NewSI << "\n");
|
|
||||||
return IsConvertable;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, StoreInst &SI,
|
|
||||||
Value *OldOp) {
|
|
||||||
Value *V = SI.getValueOperand();
|
|
||||||
Type *ValueTy = V->getType();
|
|
||||||
if (ValueTy == ElementTy ||
|
|
||||||
BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
|
BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
|
||||||
if (ValueTy != ElementTy)
|
if (V->getType() != ElementTy)
|
||||||
V = convertValue(TD, IRB, V, ElementTy);
|
V = convertValue(TD, IRB, V, ElementTy);
|
||||||
LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
||||||
getName(".load"));
|
getName(".load"));
|
||||||
V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
|
V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
|
||||||
getName(".insert"));
|
getName(".insert"));
|
||||||
} else if (ValueTy != VecTy) {
|
} else if (V->getType() != VecTy) {
|
||||||
uint64_t Size = EndOffset - BeginOffset;
|
|
||||||
if (Size < TD.getTypeStoreSize(ValueTy))
|
|
||||||
return rewriteWideStoreInst(IRB, SI, ValueTy, Size);
|
|
||||||
|
|
||||||
V = convertValue(TD, IRB, V, VecTy);
|
V = convertValue(TD, IRB, V, VecTy);
|
||||||
}
|
}
|
||||||
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
|
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
|
||||||
Pass.DeadInsts.push_back(&SI);
|
Pass.DeadInsts.insert(&SI);
|
||||||
|
|
||||||
(void)Store;
|
(void)Store;
|
||||||
DEBUG(dbgs() << " to: " << *Store << "\n");
|
DEBUG(dbgs() << " to: " << *Store << "\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) {
|
bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) {
|
||||||
assert(IntTy && "We cannot extract an integer from the alloca");
|
assert(IntTy && "We cannot extract an integer from the alloca");
|
||||||
assert(!SI.isVolatile());
|
assert(!SI.isVolatile());
|
||||||
Value *V = SI.getValueOperand();
|
|
||||||
if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
|
if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
|
||||||
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
||||||
getName(".oldload"));
|
getName(".oldload"));
|
||||||
@ -2633,7 +2580,7 @@ private:
|
|||||||
}
|
}
|
||||||
V = convertValue(TD, IRB, V, NewAllocaTy);
|
V = convertValue(TD, IRB, V, NewAllocaTy);
|
||||||
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
|
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
|
||||||
Pass.DeadInsts.push_back(&SI);
|
Pass.DeadInsts.insert(&SI);
|
||||||
(void)Store;
|
(void)Store;
|
||||||
DEBUG(dbgs() << " to: " << *Store << "\n");
|
DEBUG(dbgs() << " to: " << *Store << "\n");
|
||||||
return true;
|
return true;
|
||||||
@ -2645,46 +2592,53 @@ private:
|
|||||||
assert(OldOp == OldPtr);
|
assert(OldOp == OldPtr);
|
||||||
IRBuilder<> IRB(&SI);
|
IRBuilder<> IRB(&SI);
|
||||||
|
|
||||||
if (VecTy)
|
Value *V = SI.getValueOperand();
|
||||||
return rewriteVectorizedStoreInst(IRB, SI, OldOp);
|
|
||||||
Type *ValueTy = SI.getValueOperand()->getType();
|
|
||||||
|
|
||||||
uint64_t Size = EndOffset - BeginOffset;
|
|
||||||
if (Size < TD.getTypeStoreSize(ValueTy))
|
|
||||||
return rewriteWideStoreInst(IRB, SI, ValueTy, Size);
|
|
||||||
|
|
||||||
if (IntTy && ValueTy->isIntegerTy())
|
|
||||||
return rewriteIntegerStore(IRB, SI);
|
|
||||||
|
|
||||||
// Strip all inbounds GEPs and pointer casts to try to dig out any root
|
// Strip all inbounds GEPs and pointer casts to try to dig out any root
|
||||||
// alloca that should be re-examined after promoting this alloca.
|
// alloca that should be re-examined after promoting this alloca.
|
||||||
if (ValueTy->isPointerTy())
|
if (V->getType()->isPointerTy())
|
||||||
if (AllocaInst *AI = dyn_cast<AllocaInst>(SI.getValueOperand()
|
if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
|
||||||
->stripInBoundsOffsets()))
|
|
||||||
Pass.PostPromotionWorklist.insert(AI);
|
Pass.PostPromotionWorklist.insert(AI);
|
||||||
|
|
||||||
if (BeginOffset == NewAllocaBeginOffset &&
|
uint64_t Size = EndOffset - BeginOffset;
|
||||||
canConvertValue(TD, ValueTy, NewAllocaTy)) {
|
if (Size < TD.getTypeStoreSize(V->getType())) {
|
||||||
Value *NewV = convertValue(TD, IRB, SI.getValueOperand(), NewAllocaTy);
|
assert(!SI.isVolatile());
|
||||||
StoreInst *NewSI = IRB.CreateAlignedStore(NewV, &NewAI, NewAI.getAlignment(),
|
assert(V->getType()->isIntegerTy() &&
|
||||||
SI.isVolatile());
|
"Only integer type loads and stores are split");
|
||||||
(void)NewSI;
|
assert(V->getType()->getIntegerBitWidth() ==
|
||||||
Pass.DeadInsts.push_back(&SI);
|
TD.getTypeStoreSizeInBits(V->getType()) &&
|
||||||
|
"Non-byte-multiple bit width");
|
||||||
DEBUG(dbgs() << " to: " << *NewSI << "\n");
|
assert(V->getType()->getIntegerBitWidth() ==
|
||||||
return !SI.isVolatile();
|
TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
|
||||||
|
"Only alloca-wide stores can be split and recomposed");
|
||||||
|
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
|
||||||
|
V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
|
||||||
|
getName(".extract"));
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(!IntTy && "Invalid store found with int-op widening enabled");
|
if (VecTy)
|
||||||
|
return rewriteVectorizedStoreInst(IRB, V, SI, OldOp);
|
||||||
Value *NewPtr = getAdjustedAllocaPtr(IRB,
|
if (IntTy && V->getType()->isIntegerTy())
|
||||||
SI.getPointerOperand()->getType());
|
return rewriteIntegerStore(IRB, V, SI);
|
||||||
SI.setOperand(1, NewPtr);
|
|
||||||
SI.setAlignment(getPartitionTypeAlign(SI.getValueOperand()->getType()));
|
|
||||||
DEBUG(dbgs() << " to: " << SI << "\n");
|
|
||||||
|
|
||||||
|
StoreInst *NewSI;
|
||||||
|
if (BeginOffset == NewAllocaBeginOffset &&
|
||||||
|
canConvertValue(TD, V->getType(), NewAllocaTy)) {
|
||||||
|
V = convertValue(TD, IRB, V, NewAllocaTy);
|
||||||
|
NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
|
||||||
|
SI.isVolatile());
|
||||||
|
} else {
|
||||||
|
Value *NewPtr = getAdjustedAllocaPtr(IRB, V->getType()->getPointerTo());
|
||||||
|
NewSI = IRB.CreateAlignedStore(V, NewPtr,
|
||||||
|
getPartitionTypeAlign(V->getType()),
|
||||||
|
SI.isVolatile());
|
||||||
|
}
|
||||||
|
(void)NewSI;
|
||||||
|
Pass.DeadInsts.insert(&SI);
|
||||||
deleteIfTriviallyDead(OldOp);
|
deleteIfTriviallyDead(OldOp);
|
||||||
return NewPtr == &NewAI && !SI.isVolatile();
|
|
||||||
|
DEBUG(dbgs() << " to: " << *NewSI << "\n");
|
||||||
|
return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool visitMemSetInst(MemSetInst &II) {
|
bool visitMemSetInst(MemSetInst &II) {
|
||||||
@ -2704,8 +2658,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Record this instruction for deletion.
|
// Record this instruction for deletion.
|
||||||
if (Pass.DeadSplitInsts.insert(&II))
|
Pass.DeadInsts.insert(&II);
|
||||||
Pass.DeadInsts.push_back(&II);
|
|
||||||
|
|
||||||
Type *AllocaTy = NewAI.getAllocatedType();
|
Type *AllocaTy = NewAI.getAllocatedType();
|
||||||
Type *ScalarTy = AllocaTy->getScalarType();
|
Type *ScalarTy = AllocaTy->getScalarType();
|
||||||
@ -2861,8 +2814,7 @@ private:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Record this instruction for deletion.
|
// Record this instruction for deletion.
|
||||||
if (Pass.DeadSplitInsts.insert(&II))
|
Pass.DeadInsts.insert(&II);
|
||||||
Pass.DeadInsts.push_back(&II);
|
|
||||||
|
|
||||||
bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
|
bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
|
||||||
EndOffset == NewAllocaEndOffset;
|
EndOffset == NewAllocaEndOffset;
|
||||||
@ -2972,8 +2924,7 @@ private:
|
|||||||
assert(II.getArgOperand(1) == OldPtr);
|
assert(II.getArgOperand(1) == OldPtr);
|
||||||
|
|
||||||
// Record this instruction for deletion.
|
// Record this instruction for deletion.
|
||||||
if (Pass.DeadSplitInsts.insert(&II))
|
Pass.DeadInsts.insert(&II);
|
||||||
Pass.DeadInsts.push_back(&II);
|
|
||||||
|
|
||||||
ConstantInt *Size
|
ConstantInt *Size
|
||||||
= ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
|
= ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
|
||||||
@ -3542,7 +3493,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
|
|||||||
DI != DE; ++DI) {
|
DI != DE; ++DI) {
|
||||||
Changed = true;
|
Changed = true;
|
||||||
(*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
|
(*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
|
||||||
DeadInsts.push_back(*DI);
|
DeadInsts.insert(*DI);
|
||||||
}
|
}
|
||||||
for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(),
|
for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(),
|
||||||
DE = P.dead_op_end();
|
DE = P.dead_op_end();
|
||||||
@ -3553,7 +3504,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
|
|||||||
if (Instruction *OldI = dyn_cast<Instruction>(OldV))
|
if (Instruction *OldI = dyn_cast<Instruction>(OldV))
|
||||||
if (isInstructionTriviallyDead(OldI)) {
|
if (isInstructionTriviallyDead(OldI)) {
|
||||||
Changed = true;
|
Changed = true;
|
||||||
DeadInsts.push_back(OldI);
|
DeadInsts.insert(OldI);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3574,7 +3525,6 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
|
|||||||
/// We also record the alloca instructions deleted here so that they aren't
|
/// We also record the alloca instructions deleted here so that they aren't
|
||||||
/// subsequently handed to mem2reg to promote.
|
/// subsequently handed to mem2reg to promote.
|
||||||
void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
|
void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
|
||||||
DeadSplitInsts.clear();
|
|
||||||
while (!DeadInsts.empty()) {
|
while (!DeadInsts.empty()) {
|
||||||
Instruction *I = DeadInsts.pop_back_val();
|
Instruction *I = DeadInsts.pop_back_val();
|
||||||
DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
|
DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
|
||||||
@ -3586,7 +3536,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
|
|||||||
// Zero out the operand and see if it becomes trivially dead.
|
// Zero out the operand and see if it becomes trivially dead.
|
||||||
*OI = 0;
|
*OI = 0;
|
||||||
if (isInstructionTriviallyDead(U))
|
if (isInstructionTriviallyDead(U))
|
||||||
DeadInsts.push_back(U);
|
DeadInsts.insert(U);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
|
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
|
||||||
|
@ -1100,12 +1100,12 @@ entry:
|
|||||||
%imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
|
%imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
|
||||||
store float %phi.real, float* %real
|
store float %phi.real, float* %real
|
||||||
store float %phi.imag, float* %imag
|
store float %phi.imag, float* %imag
|
||||||
|
; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32
|
||||||
; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32
|
; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32
|
||||||
; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64
|
; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64
|
||||||
; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32
|
; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32
|
||||||
; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295
|
; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295
|
||||||
; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]]
|
; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]]
|
||||||
; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32
|
|
||||||
; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64
|
; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64
|
||||||
; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296
|
; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296
|
||||||
; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]]
|
; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]]
|
||||||
|
@ -1,25 +0,0 @@
|
|||||||
; RUN: opt < %s -sroa -S | FileCheck %s
|
|
||||||
; rdar://12713675
|
|
||||||
|
|
||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
|
||||||
|
|
||||||
define <2 x i16> @test1(i64 %x) nounwind ssp {
|
|
||||||
; CHECK: @test1
|
|
||||||
entry:
|
|
||||||
%tmp = alloca i64, align 8
|
|
||||||
br i1 undef, label %bb1, label %bb2
|
|
||||||
; CHECK-NOT: alloca
|
|
||||||
|
|
||||||
bb1:
|
|
||||||
store i64 %x, i64* %tmp, align 8
|
|
||||||
; CHECK-NOT: store
|
|
||||||
%0 = bitcast i64* %tmp to <2 x i16>*
|
|
||||||
%1 = load <2 x i16>* %0, align 8
|
|
||||||
; CHECK-NOT: load
|
|
||||||
; CHECK: trunc i64 %x to i32
|
|
||||||
; CHECK: bitcast i32
|
|
||||||
ret <2 x i16> %1
|
|
||||||
|
|
||||||
bb2:
|
|
||||||
ret <2 x i16> < i16 0, i16 0 >
|
|
||||||
}
|
|
@ -220,3 +220,48 @@ entry:
|
|||||||
ret i32 %load
|
ret i32 %load
|
||||||
; CHECK: ret i32
|
; CHECK: ret i32
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define <2 x i8> @PR14349.1(i32 %x) {
|
||||||
|
; CEHCK: @PR14349.1
|
||||||
|
; The first testcase for broken SROA rewriting of split integer loads and
|
||||||
|
; stores due to smaller vector loads and stores. This particular test ensures
|
||||||
|
; that we can rewrite a split store of an integer to a store of a vector.
|
||||||
|
entry:
|
||||||
|
%a = alloca i32
|
||||||
|
; CHECK-NOT: alloca
|
||||||
|
|
||||||
|
store i32 %x, i32* %a
|
||||||
|
; CHECK-NOT: store
|
||||||
|
|
||||||
|
%cast = bitcast i32* %a to <2 x i8>*
|
||||||
|
%vec = load <2 x i8>* %cast
|
||||||
|
; CHECK-NOT: load
|
||||||
|
|
||||||
|
ret <2 x i8> %vec
|
||||||
|
; CHECK: %[[trunc:.*]] = trunc i32 %x to i16
|
||||||
|
; CHECK: %[[cast:.*]] = bitcast i16 %[[trunc]] to <2 x i8>
|
||||||
|
; CHECK: ret <2 x i8> %[[cast]]
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @PR14349.2(<2 x i8> %x) {
|
||||||
|
; CEHCK: @PR14349.2
|
||||||
|
; The first testcase for broken SROA rewriting of split integer loads and
|
||||||
|
; stores due to smaller vector loads and stores. This particular test ensures
|
||||||
|
; that we can rewrite a split load of an integer to a load of a vector.
|
||||||
|
entry:
|
||||||
|
%a = alloca i32
|
||||||
|
; CHECK-NOT: alloca
|
||||||
|
|
||||||
|
%cast = bitcast i32* %a to <2 x i8>*
|
||||||
|
store <2 x i8> %x, <2 x i8>* %cast
|
||||||
|
; CHECK-NOT: store
|
||||||
|
|
||||||
|
%int = load i32* %a
|
||||||
|
; CHECK-NOT: load
|
||||||
|
|
||||||
|
ret i32 %int
|
||||||
|
; CHECK: %[[cast:.*]] = bitcast <2 x i8> %x to i16
|
||||||
|
; CHECK: %[[trunc:.*]] = zext i16 %[[cast]] to i32
|
||||||
|
; CHECK: %[[insert:.*]] = or i32 %{{.*}}, %[[trunc]]
|
||||||
|
; CHECK: ret i32 %[[insert]]
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user