[CodeGenPrep] move aarch64-type-promotion to CGP

Summary:
Move the aarch64-type-promotion pass within the existing type promotion framework in CGP.
This change also support forking sexts when a new sext is required for promotion.
Note that change is based on D27853 and I am submitting this out early to provide a better idea on D27853.

Reviewers: jmolloy, mcrosier, javed.absar, qcolombet

Reviewed By: qcolombet

Subscribers: llvm-commits, aemerson, rengolin, mcrosier

Differential Revision: https://reviews.llvm.org/D28680

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@299379 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jun Bum Lim 2017-04-03 19:20:07 +00:00
parent 445d3cfd6b
commit 78c95b332b
9 changed files with 374 additions and 58 deletions

View File

@ -526,6 +526,12 @@ public:
/// \return The width of the largest scalar or vector register type.
unsigned getRegisterBitWidth(bool Vector) const;
/// \return True if it should be considered for address type promotion.
/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
/// profitable without finding other extensions fed by the same input.
bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
/// \return The size of a cache line in bytes.
unsigned getCacheLineSize() const;
@ -800,6 +806,8 @@ public:
Type *Ty) = 0;
virtual unsigned getNumberOfRegisters(bool Vector) = 0;
virtual unsigned getRegisterBitWidth(bool Vector) = 0;
virtual bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
virtual unsigned getCacheLineSize() = 0;
virtual unsigned getPrefetchDistance() = 0;
virtual unsigned getMinPrefetchStride() = 0;
@ -1026,7 +1034,11 @@ public:
unsigned getRegisterBitWidth(bool Vector) override {
return Impl.getRegisterBitWidth(Vector);
}
bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
return Impl.shouldConsiderAddressTypePromotion(
I, AllowPromotionWithoutCommonHeader);
}
unsigned getCacheLineSize() override {
return Impl.getCacheLineSize();
}

View File

@ -303,6 +303,13 @@ public:
unsigned getRegisterBitWidth(bool Vector) { return 32; }
bool
shouldConsiderAddressTypePromotion(const Instruction &I,
bool &AllowPromotionWithoutCommonHeader) {
AllowPromotionWithoutCommonHeader = false;
return false;
}
unsigned getCacheLineSize() { return 0; }
unsigned getPrefetchDistance() { return 0; }

View File

@ -269,6 +269,12 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
return TTIImpl->getRegisterBitWidth(Vector);
}
bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
return TTIImpl->shouldConsiderAddressTypePromotion(
I, AllowPromotionWithoutCommonHeader);
}
unsigned TargetTransformInfo::getCacheLineSize() const {
return TTIImpl->getCacheLineSize();
}

View File

@ -138,10 +138,17 @@ static cl::opt<bool> ForceSplitStore(
"force-split-store", cl::Hidden, cl::init(false),
cl::desc("Force store splitting no matter what the target query says."));
static cl::opt<bool>
EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
cl::desc("Enable merging of redundant sexts when one is dominating"
" the other."), cl::init(true));
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
typedef SmallVector<Instruction *, 16> SExts;
typedef DenseMap<Value *, SExts> ValueToSExts;
class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
@ -170,6 +177,15 @@ class TypePromotionTransaction;
/// promotion for the current function.
InstrToOrigTy PromotedInsts;
/// Keep track of instructions removed during promotion.
SetOfInstrs RemovedInsts;
/// Keep track of sext chains based on their initial value.
DenseMap<Value *, Instruction *> SeenChainsForSExt;
/// Keep track of SExt promoted.
ValueToSExts ValToSExtendedUses;
/// True if CFG is modified in any way.
bool ModifiedDT;
@ -211,7 +227,7 @@ class TypePromotionTransaction;
Type *AccessTy, unsigned AS);
bool optimizeInlineAsmInst(CallInst *CS);
bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
bool moveExtToFormExtLoad(Instruction *&I);
bool optimizeExt(Instruction *&I);
bool optimizeExtUses(Instruction *I);
bool optimizeLoadExt(LoadInst *I);
bool optimizeSelectInst(SelectInst *SI);
@ -226,6 +242,12 @@ class TypePromotionTransaction;
const SmallVectorImpl<Instruction *> &Exts,
SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
unsigned CreatedInstsCost = 0);
bool mergeSExts(Function &F);
bool performAddressTypePromotion(
Instruction *&Inst,
bool AllowPromotionWithoutCommonHeader,
bool HasPromoted, TypePromotionTransaction &TPT,
SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
bool splitBranchCondition(Function &F);
bool simplifyOffsetableRelocate(Instruction &I);
bool splitIndirectCriticalEdges(Function &F);
@ -310,6 +332,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
SeenChainsForSExt.clear();
ValToSExtendedUses.clear();
RemovedInsts.clear();
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
@ -319,6 +344,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (ModifiedDTOnIteration)
break;
}
if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
MadeChange |= mergeSExts(F);
// Really free removed instructions during promotion.
for (Instruction *I : RemovedInsts)
delete I;
EverMadeChange |= MadeChange;
}
@ -2793,25 +2825,30 @@ class TypePromotionTransaction {
OperandsHider Hider;
/// Keep track of the uses replaced, if any.
UsesReplacer *Replacer;
/// Keep track of instructions removed.
SetOfInstrs &RemovedInsts;
public:
/// \brief Remove all reference of \p Inst and optinally replace all its
/// uses with New.
/// \p RemovedInsts Keep track of the instructions removed by this Action.
/// \pre If !Inst->use_empty(), then New != nullptr
InstructionRemover(Instruction *Inst, Value *New = nullptr)
InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
Value *New = nullptr)
: TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
Replacer(nullptr) {
Replacer(nullptr), RemovedInsts(RemovedInsts) {
if (New)
Replacer = new UsesReplacer(Inst, New);
DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
RemovedInsts.insert(Inst);
/// The instructions removed here will be freed after completing
/// optimizeBlock() for all blocks as we need to keep track of the
/// removed instructions during promotion.
Inst->removeFromParent();
}
~InstructionRemover() override { delete Replacer; }
/// \brief Really remove the instruction.
void commit() override { delete Inst; }
/// \brief Resurrect the instruction and reassign it to the proper uses if
/// new value was provided when build this action.
void undo() override {
@ -2820,6 +2857,7 @@ class TypePromotionTransaction {
if (Replacer)
Replacer->undo();
Hider.undo();
RemovedInsts.erase(Inst);
}
};
@ -2828,6 +2866,10 @@ public:
/// The restoration point is a pointer to an action instead of an iterator
/// because the iterator may be invalidated but not the pointer.
typedef const TypePromotionAction *ConstRestorationPt;
TypePromotionTransaction(SetOfInstrs &RemovedInsts)
: RemovedInsts(RemovedInsts) {}
/// Advocate every changes made in that transaction.
void commit();
/// Undo all the changes made after the given point.
@ -2859,6 +2901,7 @@ private:
/// The ordered list of actions made so far.
SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
SetOfInstrs &RemovedInsts;
};
void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
@ -2870,7 +2913,8 @@ void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
Value *NewVal) {
Actions.push_back(
make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
make_unique<TypePromotionTransaction::InstructionRemover>(Inst,
RemovedInsts, NewVal));
}
void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
@ -4097,7 +4141,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
bool IsNumUsesConsensusValid = false;
SmallVector<Instruction*, 16> AddrModeInsts;
ExtAddrMode AddrMode;
TypePromotionTransaction TPT;
TypePromotionTransaction TPT(RemovedInsts);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
while (!worklist.empty()) {
@ -4492,20 +4536,6 @@ static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
/// them.
///
/// \return true if some promotion happened, false otherwise.
///
/// Example:
/// \code
/// %ld = load i32* %addr
/// %add = add nuw i32 %ld, 4
/// %zext = zext i32 %add to i64
/// \endcode
/// =>
/// \code
/// %ld = load i32* %addr
/// %zext = zext i32 %ld to i64
/// %add = add nuw i64 %zext, 4
/// \endcode
/// Thanks to the promotion, we can match zext(load i32*) to i64.
bool CodeGenPrepare::tryToPromoteExts(
TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
@ -4601,6 +4631,46 @@ bool CodeGenPrepare::tryToPromoteExts(
return Promoted;
}
/// Merging redundant sexts when one is dominating the other.
bool CodeGenPrepare::mergeSExts(Function &F) {
DominatorTree DT(F);
bool Changed = false;
for (auto &Entry : ValToSExtendedUses) {
SExts &Insts = Entry.second;
SExts CurPts;
for (Instruction *Inst : Insts) {
if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
Inst->getOperand(0) != Entry.first)
continue;
bool inserted = false;
for (auto &Pt : CurPts) {
if (DT.dominates(Inst, Pt)) {
Pt->replaceAllUsesWith(Inst);
RemovedInsts.insert(Pt);
Pt->removeFromParent();
Pt = Inst;
inserted = true;
Changed = true;
break;
}
if (!DT.dominates(Pt, Inst))
// Give up if we need to merge in a common dominator as the
// expermients show it is not profitable.
continue;
Inst->replaceAllUsesWith(Pt);
RemovedInsts.insert(Inst);
Inst->removeFromParent();
inserted = true;
Changed = true;
break;
}
if (!inserted)
CurPts.push_back(Inst);
}
}
return Changed;
}
/// Return true, if an ext(load) can be formed from an extension in
/// \p MovedExts.
bool CodeGenPrepare::canFormExtLd(
@ -4646,49 +4716,163 @@ bool CodeGenPrepare::canFormExtLd(
/// Move a zext or sext fed by a load into the same basic block as the load,
/// unless conditions are unfavorable. This allows SelectionDAG to fold the
/// extend into the load.
/// \p I[in/out] the extension may be modified during the process if some
/// promotions apply.
///
bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
// ExtLoad formation infrastructure requires TLI to be effective.
/// E.g.,
/// \code
/// %ld = load i32* %addr
/// %add = add nuw i32 %ld, 4
/// %zext = zext i32 %add to i64
// \endcode
/// =>
/// \code
/// %ld = load i32* %addr
/// %zext = zext i32 %ld to i64
/// %add = add nuw i64 %zext, 4
/// \encode
/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
/// allow us to match zext(load i32*) to i64.
///
/// Also, try to promote the computations used to obtain a sign extended
/// value used into memory accesses.
/// E.g.,
/// \code
/// a = add nsw i32 b, 3
/// d = sext i32 a to i64
/// e = getelementptr ..., i64 d
/// \endcode
/// =>
/// \code
/// f = sext i32 b to i64
/// a = add nsw i64 f, 3
/// e = getelementptr ..., i64 a
/// \endcode
///
/// \p Inst[in/out] the extension may be modified during the process if some
/// promotions apply.
bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
// ExtLoad formation and address type promotion infrastructure requires TLI to
// be effective.
if (!TLI)
return false;
// Try to promote a chain of computation if it allows to form
// an extended load.
TypePromotionTransaction TPT;
bool AllowPromotionWithoutCommonHeader = false;
/// See if it is an interesting sext operations for the address type
/// promotion before trying to promote it, e.g., the ones with the right
/// type and used in memory accesses.
bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
*Inst, AllowPromotionWithoutCommonHeader);
TypePromotionTransaction TPT(RemovedInsts);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
SmallVector<Instruction *, 1> Exts;
SmallVector<Instruction *, 2> LastMovedExts;
Exts.push_back(I);
SmallVector<Instruction *, 2> SpeculativelyMovedExts;
Exts.push_back(Inst);
bool HasPromoted = tryToPromoteExts(TPT, Exts, LastMovedExts);
bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
// Look for a load being extended.
LoadInst *LI = nullptr;
Instruction *OldExt = I;
if (!canFormExtLd(LastMovedExts, LI, I, HasPromoted)) {
I = OldExt;
TPT.rollback(LastKnownGood);
Instruction *ExtFedByLoad;
// Try to promote a chain of computation if it allows to form an extended
// load.
if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
assert(LI && ExtFedByLoad && "Expect a valid load and extension");
TPT.commit();
// Move the extend into the same block as the load
ExtFedByLoad->removeFromParent();
ExtFedByLoad->insertAfter(LI);
// CGP does not check if the zext would be speculatively executed when moved
// to the same basic block as the load. Preserving its original location
// would pessimize the debugging experience, as well as negatively impact
// the quality of sample pgo. We don't want to use "line 0" as that has a
// size cost in the line-table section and logically the zext can be seen as
// part of the load. Therefore we conservatively reuse the same debug
// location for the load and the zext.
ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
++NumExtsMoved;
Inst = ExtFedByLoad;
return true;
}
// Continue promoting SExts if known as considerable depending on targets.
if (ATPConsiderable &&
performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
HasPromoted, TPT, SpeculativelyMovedExts))
return true;
TPT.rollback(LastKnownGood);
return false;
}
// Perform address type promotion if doing so is profitable.
// If AllowPromotionWithoutCommonHeader == false, we should find other sext
// instructions that sign extended the same initial value. However, if
// AllowPromotionWithoutCommonHeader == true, we expect promoting the
// extension is just profitable.
bool CodeGenPrepare::performAddressTypePromotion(
Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
bool HasPromoted, TypePromotionTransaction &TPT,
SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
bool Promoted = false;
SmallPtrSet<Instruction *, 1> UnhandledExts;
bool AllSeenFirst = true;
for (auto I : SpeculativelyMovedExts) {
Value *HeadOfChain = I->getOperand(0);
DenseMap<Value *, Instruction *>::iterator AlreadySeen =
SeenChainsForSExt.find(HeadOfChain);
// If there is an unhandled SExt which has the same header, try to promote
// it as well.
if (AlreadySeen != SeenChainsForSExt.end()) {
if (AlreadySeen->second != nullptr)
UnhandledExts.insert(AlreadySeen->second);
AllSeenFirst = false;
}
}
if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
SpeculativelyMovedExts.size() == 1)) {
TPT.commit();
if (HasPromoted)
Promoted = true;
for (auto I : SpeculativelyMovedExts) {
Value *HeadOfChain = I->getOperand(0);
SeenChainsForSExt[HeadOfChain] = nullptr;
ValToSExtendedUses[HeadOfChain].push_back(I);
}
// Update Inst as promotion happen.
Inst = SpeculativelyMovedExts.pop_back_val();
} else {
// This is the first chain visited from the header, keep the current chain
// as unhandled. Defer to promote this until we encounter another SExt
// chain derived from the same header.
for (auto I : SpeculativelyMovedExts) {
Value *HeadOfChain = I->getOperand(0);
SeenChainsForSExt[HeadOfChain] = Inst;
}
return false;
}
// Move the extend into the same block as the load, so that SelectionDAG
// can fold it.
TPT.commit();
I->removeFromParent();
I->insertAfter(LI);
// CGP does not check if the zext would be speculatively executed when moved
// to the same basic block as the load. Preserving its original location would
// pessimize the debugging experience, as well as negatively impact the
// quality of sample pgo. We don't want to use "line 0" as that has a
// size cost in the line-table section and logically the zext can be seen as
// part of the load. Therefore we conservatively reuse the same debug location
// for the load and the zext.
I->setDebugLoc(LI->getDebugLoc());
++NumExtsMoved;
return true;
if (!AllSeenFirst && !UnhandledExts.empty())
for (auto VisitedSExt : UnhandledExts) {
if (RemovedInsts.count(VisitedSExt))
continue;
TypePromotionTransaction TPT(RemovedInsts);
SmallVector<Instruction *, 1> Exts;
SmallVector<Instruction *, 2> Chains;
Exts.push_back(VisitedSExt);
bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
TPT.commit();
if (HasPromoted)
Promoted = true;
for (auto I : Chains) {
Value *HeadOfChain = I->getOperand(0);
// Mark this as handled.
SeenChainsForSExt[HeadOfChain] = nullptr;
ValToSExtendedUses[HeadOfChain].push_back(I);
}
}
return Promoted;
}
bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
@ -5802,7 +5986,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
TargetLowering::TypeExpandInteger) {
return SinkCast(CI);
} else {
bool MadeChange = moveExtToFormExtLoad(I);
bool MadeChange = optimizeExt(I);
return MadeChange | optimizeExtUses(I);
}
}

View File

@ -118,7 +118,7 @@ EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
static cl::opt<bool>
EnableAddressTypePromotion("aarch64-enable-type-promotion", cl::Hidden,
cl::desc("Enable the type promotion pass"),
cl::init(true));
cl::init(false));
static cl::opt<bool>
EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,

View File

@ -626,6 +626,38 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
return true;
}
/// See if \p I should be considered for address type promotion. We check if \p
/// I is a sext with right type and used in memory accesses. If it used in a
/// "complex" getelementptr, we allow it to be promoted without finding other
/// sext instructions that sign extended the same initial value. A getelementptr
/// is considered as "complex" if it has more than 2 operands.
bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
bool Considerable = false;
AllowPromotionWithoutCommonHeader = false;
if (!isa<SExtInst>(&I))
return false;
Type *ConsideredSExtType =
Type::getInt64Ty(I.getParent()->getParent()->getContext());
if (I.getType() != ConsideredSExtType)
return false;
// See if the sext is the one with the right type and used in at least one
// GetElementPtrInst.
for (const User *U : I.users()) {
if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
Considerable = true;
// A getelementptr is considered as "complex" if it has more than 2
// operands. We will promote a SExt used in such complex GEP as we
// expect some computation to be merged if they are done on 64 bits.
if (GEPInst->getNumOperands() > 2) {
AllowPromotionWithoutCommonHeader = true;
break;
}
}
}
return Considerable;
}
unsigned AArch64TTIImpl::getCacheLineSize() {
return ST->getCacheLineSize();
}

View File

@ -121,6 +121,10 @@ public:
ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace);
bool
shouldConsiderAddressTypePromotion(const Instruction &I,
bool &AllowPromotionWithoutCommonHeader);
unsigned getCacheLineSize();
unsigned getPrefetchDistance();

View File

@ -0,0 +1,68 @@
; RUN: opt -codegenprepare < %s -S | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
%struct.match_state = type { i64, i64 }
; %add is also promoted by forking an extra sext.
define void @promoteTwoOne(i32 %i, i32 %j, i64* %P1, i64* %P2 ) {
; CHECK-LABEL: @promoteTwoOne
; CHECK-LABEL: entry:
; CHECK: %[[SEXT1:.*]] = sext i32 %i to i64
; CHECK: %[[SEXT2:.*]] = sext i32 %j to i64
; CHECK: %add = add nsw i64 %[[SEXT1]], %[[SEXT2]]
entry:
%add = add nsw i32 %i, %j
%s = sext i32 %add to i64
%addr1 = getelementptr inbounds i64, i64* %P1, i64 %s
store i64 %s, i64* %addr1
%s2 = sext i32 %i to i64
%addr2 = getelementptr inbounds i64, i64* %P2, i64 %s2
store i64 %s2, i64* %addr2
ret void
}
; Both %add1 and %add2 are promoted by forking extra sexts.
define void @promoteTwoTwo(i32 %i, i32 %j, i32 %k, i64* %P1, i64* %P2) {
; CHECK-LABEL: @promoteTwoTwo
; CHECK-LABEL:entry:
; CHECK: %[[SEXT1:.*]] = sext i32 %j to i64
; CHECK: %[[SEXT2:.*]] = sext i32 %i to i64
; CHECK: %add1 = add nsw i64 %[[SEXT1]], %[[SEXT2]]
; CHECK: %[[SEXT3:.*]] = sext i32 %k to i64
; CHECK: %add2 = add nsw i64 %[[SEXT1]], %[[SEXT3]]
entry:
%add1 = add nsw i32 %j, %i
%s = sext i32 %add1 to i64
%addr1 = getelementptr inbounds i64, i64* %P1, i64 %s
store i64 %s, i64* %addr1
%add2 = add nsw i32 %j, %k
%s2 = sext i32 %add2 to i64
%addr2 = getelementptr inbounds i64, i64* %P2, i64 %s2
store i64 %s2, i64* %addr2
ret void
}
define i64 @promoteGEPSunk(i1 %cond, i64* %base, i32 %i) {
; CHECK-LABEL: @promoteGEPSunk
; CHECK-LABEL: entry:
; CHECK: %[[SEXT:.*]] = sext i32 %i to i64
; CHECK: %add = add nsw i64 %[[SEXT]], 1
; CHECK: %add2 = add nsw i64 %[[SEXT]], 2
entry:
%add = add nsw i32 %i, 1
%s = sext i32 %add to i64
%addr = getelementptr inbounds i64, i64* %base, i64 %s
%add2 = add nsw i32 %i, 2
%s2 = sext i32 %add2 to i64
%addr2 = getelementptr inbounds i64, i64* %base, i64 %s2
br i1 %cond, label %if.then, label %if.then2
if.then:
%v = load i64, i64* %addr
%v2 = load i64, i64* %addr2
%r = add i64 %v, %v2
ret i64 %r
if.then2:
ret i64 0;
}

View File

@ -10,14 +10,17 @@ define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
; CHECK: fullGtU
; CHECK: adrp [[PAGE:x[0-9]+]], _block@GOTPAGE
; CHECK: ldr [[ADDR:x[0-9]+]], {{\[}}[[PAGE]], _block@GOTPAGEOFF]
; CHECK: sxtw [[I1:x[0-9]+]], w0
; CHECK: sxtw [[I2:x[0-9]+]], w1
; CHECK-NEXT: ldr [[BLOCKBASE:x[0-9]+]], {{\[}}[[ADDR]]]
; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]], w0, sxtw]
; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], w1, sxtw]
; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]], [[I1]]]
; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], [[I2]]]
; CHECK-NEXT: cmp [[BLOCKVAL1]], [[BLOCKVAL2]]
; CHECK-NEXT: b.ne
; Next BB
; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], w1, sxtw
; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], w0, sxtw
; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], [[I2]]
; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], [[I1]]
; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1]
; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1]
; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]]