mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-27 21:50:40 +00:00
[CodeGenPrep] move aarch64-type-promotion to CGP
Summary: Move the aarch64-type-promotion pass within the existing type promotion framework in CGP. This change also support forking sexts when a new sext is required for promotion. Note that change is based on D27853 and I am submitting this out early to provide a better idea on D27853. Reviewers: jmolloy, mcrosier, javed.absar, qcolombet Reviewed By: qcolombet Subscribers: llvm-commits, aemerson, rengolin, mcrosier Differential Revision: https://reviews.llvm.org/D28680 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@299379 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
445d3cfd6b
commit
78c95b332b
@ -526,6 +526,12 @@ public:
|
||||
/// \return The width of the largest scalar or vector register type.
|
||||
unsigned getRegisterBitWidth(bool Vector) const;
|
||||
|
||||
/// \return True if it should be considered for address type promotion.
|
||||
/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
|
||||
/// profitable without finding other extensions fed by the same input.
|
||||
bool shouldConsiderAddressTypePromotion(
|
||||
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
|
||||
|
||||
/// \return The size of a cache line in bytes.
|
||||
unsigned getCacheLineSize() const;
|
||||
|
||||
@ -800,6 +806,8 @@ public:
|
||||
Type *Ty) = 0;
|
||||
virtual unsigned getNumberOfRegisters(bool Vector) = 0;
|
||||
virtual unsigned getRegisterBitWidth(bool Vector) = 0;
|
||||
virtual bool shouldConsiderAddressTypePromotion(
|
||||
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
|
||||
virtual unsigned getCacheLineSize() = 0;
|
||||
virtual unsigned getPrefetchDistance() = 0;
|
||||
virtual unsigned getMinPrefetchStride() = 0;
|
||||
@ -1026,7 +1034,11 @@ public:
|
||||
unsigned getRegisterBitWidth(bool Vector) override {
|
||||
return Impl.getRegisterBitWidth(Vector);
|
||||
}
|
||||
|
||||
bool shouldConsiderAddressTypePromotion(
|
||||
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
|
||||
return Impl.shouldConsiderAddressTypePromotion(
|
||||
I, AllowPromotionWithoutCommonHeader);
|
||||
}
|
||||
unsigned getCacheLineSize() override {
|
||||
return Impl.getCacheLineSize();
|
||||
}
|
||||
|
@ -303,6 +303,13 @@ public:
|
||||
|
||||
unsigned getRegisterBitWidth(bool Vector) { return 32; }
|
||||
|
||||
bool
|
||||
shouldConsiderAddressTypePromotion(const Instruction &I,
|
||||
bool &AllowPromotionWithoutCommonHeader) {
|
||||
AllowPromotionWithoutCommonHeader = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned getCacheLineSize() { return 0; }
|
||||
|
||||
unsigned getPrefetchDistance() { return 0; }
|
||||
|
@ -269,6 +269,12 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
|
||||
return TTIImpl->getRegisterBitWidth(Vector);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
|
||||
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
|
||||
return TTIImpl->shouldConsiderAddressTypePromotion(
|
||||
I, AllowPromotionWithoutCommonHeader);
|
||||
}
|
||||
|
||||
unsigned TargetTransformInfo::getCacheLineSize() const {
|
||||
return TTIImpl->getCacheLineSize();
|
||||
}
|
||||
|
@ -138,10 +138,17 @@ static cl::opt<bool> ForceSplitStore(
|
||||
"force-split-store", cl::Hidden, cl::init(false),
|
||||
cl::desc("Force store splitting no matter what the target query says."));
|
||||
|
||||
static cl::opt<bool>
|
||||
EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
|
||||
cl::desc("Enable merging of redundant sexts when one is dominating"
|
||||
" the other."), cl::init(true));
|
||||
|
||||
namespace {
|
||||
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
|
||||
typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
|
||||
typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
|
||||
typedef SmallVector<Instruction *, 16> SExts;
|
||||
typedef DenseMap<Value *, SExts> ValueToSExts;
|
||||
class TypePromotionTransaction;
|
||||
|
||||
class CodeGenPrepare : public FunctionPass {
|
||||
@ -170,6 +177,15 @@ class TypePromotionTransaction;
|
||||
/// promotion for the current function.
|
||||
InstrToOrigTy PromotedInsts;
|
||||
|
||||
/// Keep track of instructions removed during promotion.
|
||||
SetOfInstrs RemovedInsts;
|
||||
|
||||
/// Keep track of sext chains based on their initial value.
|
||||
DenseMap<Value *, Instruction *> SeenChainsForSExt;
|
||||
|
||||
/// Keep track of SExt promoted.
|
||||
ValueToSExts ValToSExtendedUses;
|
||||
|
||||
/// True if CFG is modified in any way.
|
||||
bool ModifiedDT;
|
||||
|
||||
@ -211,7 +227,7 @@ class TypePromotionTransaction;
|
||||
Type *AccessTy, unsigned AS);
|
||||
bool optimizeInlineAsmInst(CallInst *CS);
|
||||
bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
|
||||
bool moveExtToFormExtLoad(Instruction *&I);
|
||||
bool optimizeExt(Instruction *&I);
|
||||
bool optimizeExtUses(Instruction *I);
|
||||
bool optimizeLoadExt(LoadInst *I);
|
||||
bool optimizeSelectInst(SelectInst *SI);
|
||||
@ -226,6 +242,12 @@ class TypePromotionTransaction;
|
||||
const SmallVectorImpl<Instruction *> &Exts,
|
||||
SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
|
||||
unsigned CreatedInstsCost = 0);
|
||||
bool mergeSExts(Function &F);
|
||||
bool performAddressTypePromotion(
|
||||
Instruction *&Inst,
|
||||
bool AllowPromotionWithoutCommonHeader,
|
||||
bool HasPromoted, TypePromotionTransaction &TPT,
|
||||
SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
|
||||
bool splitBranchCondition(Function &F);
|
||||
bool simplifyOffsetableRelocate(Instruction &I);
|
||||
bool splitIndirectCriticalEdges(Function &F);
|
||||
@ -310,6 +332,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
|
||||
bool MadeChange = true;
|
||||
while (MadeChange) {
|
||||
MadeChange = false;
|
||||
SeenChainsForSExt.clear();
|
||||
ValToSExtendedUses.clear();
|
||||
RemovedInsts.clear();
|
||||
for (Function::iterator I = F.begin(); I != F.end(); ) {
|
||||
BasicBlock *BB = &*I++;
|
||||
bool ModifiedDTOnIteration = false;
|
||||
@ -319,6 +344,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
|
||||
if (ModifiedDTOnIteration)
|
||||
break;
|
||||
}
|
||||
if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
|
||||
MadeChange |= mergeSExts(F);
|
||||
|
||||
// Really free removed instructions during promotion.
|
||||
for (Instruction *I : RemovedInsts)
|
||||
delete I;
|
||||
|
||||
EverMadeChange |= MadeChange;
|
||||
}
|
||||
|
||||
@ -2793,25 +2825,30 @@ class TypePromotionTransaction {
|
||||
OperandsHider Hider;
|
||||
/// Keep track of the uses replaced, if any.
|
||||
UsesReplacer *Replacer;
|
||||
/// Keep track of instructions removed.
|
||||
SetOfInstrs &RemovedInsts;
|
||||
|
||||
public:
|
||||
/// \brief Remove all reference of \p Inst and optinally replace all its
|
||||
/// uses with New.
|
||||
/// \p RemovedInsts Keep track of the instructions removed by this Action.
|
||||
/// \pre If !Inst->use_empty(), then New != nullptr
|
||||
InstructionRemover(Instruction *Inst, Value *New = nullptr)
|
||||
InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
|
||||
Value *New = nullptr)
|
||||
: TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
|
||||
Replacer(nullptr) {
|
||||
Replacer(nullptr), RemovedInsts(RemovedInsts) {
|
||||
if (New)
|
||||
Replacer = new UsesReplacer(Inst, New);
|
||||
DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
|
||||
RemovedInsts.insert(Inst);
|
||||
/// The instructions removed here will be freed after completing
|
||||
/// optimizeBlock() for all blocks as we need to keep track of the
|
||||
/// removed instructions during promotion.
|
||||
Inst->removeFromParent();
|
||||
}
|
||||
|
||||
~InstructionRemover() override { delete Replacer; }
|
||||
|
||||
/// \brief Really remove the instruction.
|
||||
void commit() override { delete Inst; }
|
||||
|
||||
/// \brief Resurrect the instruction and reassign it to the proper uses if
|
||||
/// new value was provided when build this action.
|
||||
void undo() override {
|
||||
@ -2820,6 +2857,7 @@ class TypePromotionTransaction {
|
||||
if (Replacer)
|
||||
Replacer->undo();
|
||||
Hider.undo();
|
||||
RemovedInsts.erase(Inst);
|
||||
}
|
||||
};
|
||||
|
||||
@ -2828,6 +2866,10 @@ public:
|
||||
/// The restoration point is a pointer to an action instead of an iterator
|
||||
/// because the iterator may be invalidated but not the pointer.
|
||||
typedef const TypePromotionAction *ConstRestorationPt;
|
||||
|
||||
TypePromotionTransaction(SetOfInstrs &RemovedInsts)
|
||||
: RemovedInsts(RemovedInsts) {}
|
||||
|
||||
/// Advocate every changes made in that transaction.
|
||||
void commit();
|
||||
/// Undo all the changes made after the given point.
|
||||
@ -2859,6 +2901,7 @@ private:
|
||||
/// The ordered list of actions made so far.
|
||||
SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
|
||||
typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
|
||||
SetOfInstrs &RemovedInsts;
|
||||
};
|
||||
|
||||
void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
|
||||
@ -2870,7 +2913,8 @@ void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
|
||||
void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
|
||||
Value *NewVal) {
|
||||
Actions.push_back(
|
||||
make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
|
||||
make_unique<TypePromotionTransaction::InstructionRemover>(Inst,
|
||||
RemovedInsts, NewVal));
|
||||
}
|
||||
|
||||
void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
|
||||
@ -4097,7 +4141,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
|
||||
bool IsNumUsesConsensusValid = false;
|
||||
SmallVector<Instruction*, 16> AddrModeInsts;
|
||||
ExtAddrMode AddrMode;
|
||||
TypePromotionTransaction TPT;
|
||||
TypePromotionTransaction TPT(RemovedInsts);
|
||||
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
|
||||
TPT.getRestorationPoint();
|
||||
while (!worklist.empty()) {
|
||||
@ -4492,20 +4536,6 @@ static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
|
||||
/// them.
|
||||
///
|
||||
/// \return true if some promotion happened, false otherwise.
|
||||
///
|
||||
/// Example:
|
||||
/// \code
|
||||
/// %ld = load i32* %addr
|
||||
/// %add = add nuw i32 %ld, 4
|
||||
/// %zext = zext i32 %add to i64
|
||||
/// \endcode
|
||||
/// =>
|
||||
/// \code
|
||||
/// %ld = load i32* %addr
|
||||
/// %zext = zext i32 %ld to i64
|
||||
/// %add = add nuw i64 %zext, 4
|
||||
/// \endcode
|
||||
/// Thanks to the promotion, we can match zext(load i32*) to i64.
|
||||
bool CodeGenPrepare::tryToPromoteExts(
|
||||
TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
|
||||
SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
|
||||
@ -4601,6 +4631,46 @@ bool CodeGenPrepare::tryToPromoteExts(
|
||||
return Promoted;
|
||||
}
|
||||
|
||||
/// Merging redundant sexts when one is dominating the other.
|
||||
bool CodeGenPrepare::mergeSExts(Function &F) {
|
||||
DominatorTree DT(F);
|
||||
bool Changed = false;
|
||||
for (auto &Entry : ValToSExtendedUses) {
|
||||
SExts &Insts = Entry.second;
|
||||
SExts CurPts;
|
||||
for (Instruction *Inst : Insts) {
|
||||
if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
|
||||
Inst->getOperand(0) != Entry.first)
|
||||
continue;
|
||||
bool inserted = false;
|
||||
for (auto &Pt : CurPts) {
|
||||
if (DT.dominates(Inst, Pt)) {
|
||||
Pt->replaceAllUsesWith(Inst);
|
||||
RemovedInsts.insert(Pt);
|
||||
Pt->removeFromParent();
|
||||
Pt = Inst;
|
||||
inserted = true;
|
||||
Changed = true;
|
||||
break;
|
||||
}
|
||||
if (!DT.dominates(Pt, Inst))
|
||||
// Give up if we need to merge in a common dominator as the
|
||||
// expermients show it is not profitable.
|
||||
continue;
|
||||
Inst->replaceAllUsesWith(Pt);
|
||||
RemovedInsts.insert(Inst);
|
||||
Inst->removeFromParent();
|
||||
inserted = true;
|
||||
Changed = true;
|
||||
break;
|
||||
}
|
||||
if (!inserted)
|
||||
CurPts.push_back(Inst);
|
||||
}
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
/// Return true, if an ext(load) can be formed from an extension in
|
||||
/// \p MovedExts.
|
||||
bool CodeGenPrepare::canFormExtLd(
|
||||
@ -4646,49 +4716,163 @@ bool CodeGenPrepare::canFormExtLd(
|
||||
/// Move a zext or sext fed by a load into the same basic block as the load,
|
||||
/// unless conditions are unfavorable. This allows SelectionDAG to fold the
|
||||
/// extend into the load.
|
||||
/// \p I[in/out] the extension may be modified during the process if some
|
||||
/// promotions apply.
|
||||
///
|
||||
bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
|
||||
// ExtLoad formation infrastructure requires TLI to be effective.
|
||||
/// E.g.,
|
||||
/// \code
|
||||
/// %ld = load i32* %addr
|
||||
/// %add = add nuw i32 %ld, 4
|
||||
/// %zext = zext i32 %add to i64
|
||||
// \endcode
|
||||
/// =>
|
||||
/// \code
|
||||
/// %ld = load i32* %addr
|
||||
/// %zext = zext i32 %ld to i64
|
||||
/// %add = add nuw i64 %zext, 4
|
||||
/// \encode
|
||||
/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
|
||||
/// allow us to match zext(load i32*) to i64.
|
||||
///
|
||||
/// Also, try to promote the computations used to obtain a sign extended
|
||||
/// value used into memory accesses.
|
||||
/// E.g.,
|
||||
/// \code
|
||||
/// a = add nsw i32 b, 3
|
||||
/// d = sext i32 a to i64
|
||||
/// e = getelementptr ..., i64 d
|
||||
/// \endcode
|
||||
/// =>
|
||||
/// \code
|
||||
/// f = sext i32 b to i64
|
||||
/// a = add nsw i64 f, 3
|
||||
/// e = getelementptr ..., i64 a
|
||||
/// \endcode
|
||||
///
|
||||
/// \p Inst[in/out] the extension may be modified during the process if some
|
||||
/// promotions apply.
|
||||
bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
|
||||
// ExtLoad formation and address type promotion infrastructure requires TLI to
|
||||
// be effective.
|
||||
if (!TLI)
|
||||
return false;
|
||||
|
||||
// Try to promote a chain of computation if it allows to form
|
||||
// an extended load.
|
||||
TypePromotionTransaction TPT;
|
||||
bool AllowPromotionWithoutCommonHeader = false;
|
||||
/// See if it is an interesting sext operations for the address type
|
||||
/// promotion before trying to promote it, e.g., the ones with the right
|
||||
/// type and used in memory accesses.
|
||||
bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
|
||||
*Inst, AllowPromotionWithoutCommonHeader);
|
||||
TypePromotionTransaction TPT(RemovedInsts);
|
||||
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
|
||||
TPT.getRestorationPoint();
|
||||
SmallVector<Instruction *, 1> Exts;
|
||||
SmallVector<Instruction *, 2> LastMovedExts;
|
||||
Exts.push_back(I);
|
||||
SmallVector<Instruction *, 2> SpeculativelyMovedExts;
|
||||
Exts.push_back(Inst);
|
||||
|
||||
bool HasPromoted = tryToPromoteExts(TPT, Exts, LastMovedExts);
|
||||
bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
|
||||
|
||||
// Look for a load being extended.
|
||||
LoadInst *LI = nullptr;
|
||||
Instruction *OldExt = I;
|
||||
if (!canFormExtLd(LastMovedExts, LI, I, HasPromoted)) {
|
||||
I = OldExt;
|
||||
TPT.rollback(LastKnownGood);
|
||||
Instruction *ExtFedByLoad;
|
||||
|
||||
// Try to promote a chain of computation if it allows to form an extended
|
||||
// load.
|
||||
if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
|
||||
assert(LI && ExtFedByLoad && "Expect a valid load and extension");
|
||||
TPT.commit();
|
||||
// Move the extend into the same block as the load
|
||||
ExtFedByLoad->removeFromParent();
|
||||
ExtFedByLoad->insertAfter(LI);
|
||||
// CGP does not check if the zext would be speculatively executed when moved
|
||||
// to the same basic block as the load. Preserving its original location
|
||||
// would pessimize the debugging experience, as well as negatively impact
|
||||
// the quality of sample pgo. We don't want to use "line 0" as that has a
|
||||
// size cost in the line-table section and logically the zext can be seen as
|
||||
// part of the load. Therefore we conservatively reuse the same debug
|
||||
// location for the load and the zext.
|
||||
ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
|
||||
++NumExtsMoved;
|
||||
Inst = ExtFedByLoad;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Continue promoting SExts if known as considerable depending on targets.
|
||||
if (ATPConsiderable &&
|
||||
performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
|
||||
HasPromoted, TPT, SpeculativelyMovedExts))
|
||||
return true;
|
||||
|
||||
TPT.rollback(LastKnownGood);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Perform address type promotion if doing so is profitable.
|
||||
// If AllowPromotionWithoutCommonHeader == false, we should find other sext
|
||||
// instructions that sign extended the same initial value. However, if
|
||||
// AllowPromotionWithoutCommonHeader == true, we expect promoting the
|
||||
// extension is just profitable.
|
||||
bool CodeGenPrepare::performAddressTypePromotion(
|
||||
Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
|
||||
bool HasPromoted, TypePromotionTransaction &TPT,
|
||||
SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
|
||||
bool Promoted = false;
|
||||
SmallPtrSet<Instruction *, 1> UnhandledExts;
|
||||
bool AllSeenFirst = true;
|
||||
for (auto I : SpeculativelyMovedExts) {
|
||||
Value *HeadOfChain = I->getOperand(0);
|
||||
DenseMap<Value *, Instruction *>::iterator AlreadySeen =
|
||||
SeenChainsForSExt.find(HeadOfChain);
|
||||
// If there is an unhandled SExt which has the same header, try to promote
|
||||
// it as well.
|
||||
if (AlreadySeen != SeenChainsForSExt.end()) {
|
||||
if (AlreadySeen->second != nullptr)
|
||||
UnhandledExts.insert(AlreadySeen->second);
|
||||
AllSeenFirst = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
|
||||
SpeculativelyMovedExts.size() == 1)) {
|
||||
TPT.commit();
|
||||
if (HasPromoted)
|
||||
Promoted = true;
|
||||
for (auto I : SpeculativelyMovedExts) {
|
||||
Value *HeadOfChain = I->getOperand(0);
|
||||
SeenChainsForSExt[HeadOfChain] = nullptr;
|
||||
ValToSExtendedUses[HeadOfChain].push_back(I);
|
||||
}
|
||||
// Update Inst as promotion happen.
|
||||
Inst = SpeculativelyMovedExts.pop_back_val();
|
||||
} else {
|
||||
// This is the first chain visited from the header, keep the current chain
|
||||
// as unhandled. Defer to promote this until we encounter another SExt
|
||||
// chain derived from the same header.
|
||||
for (auto I : SpeculativelyMovedExts) {
|
||||
Value *HeadOfChain = I->getOperand(0);
|
||||
SeenChainsForSExt[HeadOfChain] = Inst;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Move the extend into the same block as the load, so that SelectionDAG
|
||||
// can fold it.
|
||||
TPT.commit();
|
||||
I->removeFromParent();
|
||||
I->insertAfter(LI);
|
||||
// CGP does not check if the zext would be speculatively executed when moved
|
||||
// to the same basic block as the load. Preserving its original location would
|
||||
// pessimize the debugging experience, as well as negatively impact the
|
||||
// quality of sample pgo. We don't want to use "line 0" as that has a
|
||||
// size cost in the line-table section and logically the zext can be seen as
|
||||
// part of the load. Therefore we conservatively reuse the same debug location
|
||||
// for the load and the zext.
|
||||
I->setDebugLoc(LI->getDebugLoc());
|
||||
++NumExtsMoved;
|
||||
return true;
|
||||
if (!AllSeenFirst && !UnhandledExts.empty())
|
||||
for (auto VisitedSExt : UnhandledExts) {
|
||||
if (RemovedInsts.count(VisitedSExt))
|
||||
continue;
|
||||
TypePromotionTransaction TPT(RemovedInsts);
|
||||
SmallVector<Instruction *, 1> Exts;
|
||||
SmallVector<Instruction *, 2> Chains;
|
||||
Exts.push_back(VisitedSExt);
|
||||
bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
|
||||
TPT.commit();
|
||||
if (HasPromoted)
|
||||
Promoted = true;
|
||||
for (auto I : Chains) {
|
||||
Value *HeadOfChain = I->getOperand(0);
|
||||
// Mark this as handled.
|
||||
SeenChainsForSExt[HeadOfChain] = nullptr;
|
||||
ValToSExtendedUses[HeadOfChain].push_back(I);
|
||||
}
|
||||
}
|
||||
return Promoted;
|
||||
}
|
||||
|
||||
bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
|
||||
@ -5802,7 +5986,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
|
||||
TargetLowering::TypeExpandInteger) {
|
||||
return SinkCast(CI);
|
||||
} else {
|
||||
bool MadeChange = moveExtToFormExtLoad(I);
|
||||
bool MadeChange = optimizeExt(I);
|
||||
return MadeChange | optimizeExtUses(I);
|
||||
}
|
||||
}
|
||||
|
@ -118,7 +118,7 @@ EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
|
||||
static cl::opt<bool>
|
||||
EnableAddressTypePromotion("aarch64-enable-type-promotion", cl::Hidden,
|
||||
cl::desc("Enable the type promotion pass"),
|
||||
cl::init(true));
|
||||
cl::init(false));
|
||||
|
||||
static cl::opt<bool>
|
||||
EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
|
||||
|
@ -626,6 +626,38 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
|
||||
return true;
|
||||
}
|
||||
|
||||
/// See if \p I should be considered for address type promotion. We check if \p
|
||||
/// I is a sext with right type and used in memory accesses. If it used in a
|
||||
/// "complex" getelementptr, we allow it to be promoted without finding other
|
||||
/// sext instructions that sign extended the same initial value. A getelementptr
|
||||
/// is considered as "complex" if it has more than 2 operands.
|
||||
bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
|
||||
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
|
||||
bool Considerable = false;
|
||||
AllowPromotionWithoutCommonHeader = false;
|
||||
if (!isa<SExtInst>(&I))
|
||||
return false;
|
||||
Type *ConsideredSExtType =
|
||||
Type::getInt64Ty(I.getParent()->getParent()->getContext());
|
||||
if (I.getType() != ConsideredSExtType)
|
||||
return false;
|
||||
// See if the sext is the one with the right type and used in at least one
|
||||
// GetElementPtrInst.
|
||||
for (const User *U : I.users()) {
|
||||
if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
|
||||
Considerable = true;
|
||||
// A getelementptr is considered as "complex" if it has more than 2
|
||||
// operands. We will promote a SExt used in such complex GEP as we
|
||||
// expect some computation to be merged if they are done on 64 bits.
|
||||
if (GEPInst->getNumOperands() > 2) {
|
||||
AllowPromotionWithoutCommonHeader = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Considerable;
|
||||
}
|
||||
|
||||
unsigned AArch64TTIImpl::getCacheLineSize() {
|
||||
return ST->getCacheLineSize();
|
||||
}
|
||||
|
@ -121,6 +121,10 @@ public:
|
||||
ArrayRef<unsigned> Indices, unsigned Alignment,
|
||||
unsigned AddressSpace);
|
||||
|
||||
bool
|
||||
shouldConsiderAddressTypePromotion(const Instruction &I,
|
||||
bool &AllowPromotionWithoutCommonHeader);
|
||||
|
||||
unsigned getCacheLineSize();
|
||||
|
||||
unsigned getPrefetchDistance();
|
||||
|
68
test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll
Normal file
68
test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll
Normal file
@ -0,0 +1,68 @@
|
||||
; RUN: opt -codegenprepare < %s -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux-gnu"
|
||||
|
||||
%struct.match_state = type { i64, i64 }
|
||||
|
||||
; %add is also promoted by forking an extra sext.
|
||||
define void @promoteTwoOne(i32 %i, i32 %j, i64* %P1, i64* %P2 ) {
|
||||
; CHECK-LABEL: @promoteTwoOne
|
||||
; CHECK-LABEL: entry:
|
||||
; CHECK: %[[SEXT1:.*]] = sext i32 %i to i64
|
||||
; CHECK: %[[SEXT2:.*]] = sext i32 %j to i64
|
||||
; CHECK: %add = add nsw i64 %[[SEXT1]], %[[SEXT2]]
|
||||
entry:
|
||||
%add = add nsw i32 %i, %j
|
||||
%s = sext i32 %add to i64
|
||||
%addr1 = getelementptr inbounds i64, i64* %P1, i64 %s
|
||||
store i64 %s, i64* %addr1
|
||||
%s2 = sext i32 %i to i64
|
||||
%addr2 = getelementptr inbounds i64, i64* %P2, i64 %s2
|
||||
store i64 %s2, i64* %addr2
|
||||
ret void
|
||||
}
|
||||
|
||||
; Both %add1 and %add2 are promoted by forking extra sexts.
|
||||
define void @promoteTwoTwo(i32 %i, i32 %j, i32 %k, i64* %P1, i64* %P2) {
|
||||
; CHECK-LABEL: @promoteTwoTwo
|
||||
; CHECK-LABEL:entry:
|
||||
; CHECK: %[[SEXT1:.*]] = sext i32 %j to i64
|
||||
; CHECK: %[[SEXT2:.*]] = sext i32 %i to i64
|
||||
; CHECK: %add1 = add nsw i64 %[[SEXT1]], %[[SEXT2]]
|
||||
; CHECK: %[[SEXT3:.*]] = sext i32 %k to i64
|
||||
; CHECK: %add2 = add nsw i64 %[[SEXT1]], %[[SEXT3]]
|
||||
entry:
|
||||
%add1 = add nsw i32 %j, %i
|
||||
%s = sext i32 %add1 to i64
|
||||
%addr1 = getelementptr inbounds i64, i64* %P1, i64 %s
|
||||
store i64 %s, i64* %addr1
|
||||
%add2 = add nsw i32 %j, %k
|
||||
%s2 = sext i32 %add2 to i64
|
||||
%addr2 = getelementptr inbounds i64, i64* %P2, i64 %s2
|
||||
store i64 %s2, i64* %addr2
|
||||
ret void
|
||||
}
|
||||
|
||||
define i64 @promoteGEPSunk(i1 %cond, i64* %base, i32 %i) {
|
||||
; CHECK-LABEL: @promoteGEPSunk
|
||||
; CHECK-LABEL: entry:
|
||||
; CHECK: %[[SEXT:.*]] = sext i32 %i to i64
|
||||
; CHECK: %add = add nsw i64 %[[SEXT]], 1
|
||||
; CHECK: %add2 = add nsw i64 %[[SEXT]], 2
|
||||
entry:
|
||||
%add = add nsw i32 %i, 1
|
||||
%s = sext i32 %add to i64
|
||||
%addr = getelementptr inbounds i64, i64* %base, i64 %s
|
||||
%add2 = add nsw i32 %i, 2
|
||||
%s2 = sext i32 %add2 to i64
|
||||
%addr2 = getelementptr inbounds i64, i64* %base, i64 %s2
|
||||
br i1 %cond, label %if.then, label %if.then2
|
||||
if.then:
|
||||
%v = load i64, i64* %addr
|
||||
%v2 = load i64, i64* %addr2
|
||||
%r = add i64 %v, %v2
|
||||
ret i64 %r
|
||||
if.then2:
|
||||
ret i64 0;
|
||||
}
|
@ -10,14 +10,17 @@ define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
|
||||
; CHECK: fullGtU
|
||||
; CHECK: adrp [[PAGE:x[0-9]+]], _block@GOTPAGE
|
||||
; CHECK: ldr [[ADDR:x[0-9]+]], {{\[}}[[PAGE]], _block@GOTPAGEOFF]
|
||||
; CHECK: sxtw [[I1:x[0-9]+]], w0
|
||||
; CHECK: sxtw [[I2:x[0-9]+]], w1
|
||||
; CHECK-NEXT: ldr [[BLOCKBASE:x[0-9]+]], {{\[}}[[ADDR]]]
|
||||
; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]], w0, sxtw]
|
||||
; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], w1, sxtw]
|
||||
; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]], [[I1]]]
|
||||
; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], [[I2]]]
|
||||
|
||||
; CHECK-NEXT: cmp [[BLOCKVAL1]], [[BLOCKVAL2]]
|
||||
; CHECK-NEXT: b.ne
|
||||
; Next BB
|
||||
; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], w1, sxtw
|
||||
; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], w0, sxtw
|
||||
; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], [[I2]]
|
||||
; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], [[I1]]
|
||||
; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1]
|
||||
; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1]
|
||||
; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]]
|
||||
|
Loading…
Reference in New Issue
Block a user