[CodeGenPrep] move aarch64-type-promotion to CGP

Summary: Move the aarch64-type-promotion pass within the existing type promotion framework in CGP. This change also support forking sexts when a new sext is required for promotion. Note that change is based on D27853 and I am submitting this out early to provide a better idea on D27853. Reviewers: jmolloy, mcrosier, javed.absar, qcolombet Reviewed By: qcolombet Subscribers: llvm-commits, aemerson, rengolin, mcrosier Differential Revision: https://reviews.llvm.org/D28680 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@299379 91177308-0d34-0410-b5e6-96231b3b80d8
2024-11-27 21:50:40 +00:00 · 2017-04-03 19:20:07 +00:00 · 2017-04-03 19:20:07 +00:00 · 78c95b332b
commit 78c95b332b
parent 445d3cfd6b
9 changed files with 374 additions and 58 deletions
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@ -526,6 +526,12 @@ public:
  /// \return The width of the largest scalar or vector register type.
  unsigned getRegisterBitWidth(bool Vector) const;

+  /// \return True if it should be considered for address type promotion.
+  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
+  /// profitable without finding other extensions fed by the same input.
+  bool shouldConsiderAddressTypePromotion(
+      const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
+
  /// \return The size of a cache line in bytes.
  unsigned getCacheLineSize() const;

@ -800,6 +806,8 @@ public:
                            Type *Ty) = 0;
  virtual unsigned getNumberOfRegisters(bool Vector) = 0;
  virtual unsigned getRegisterBitWidth(bool Vector) = 0;
+  virtual bool shouldConsiderAddressTypePromotion(
+      const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
  virtual unsigned getCacheLineSize() = 0;
  virtual unsigned getPrefetchDistance() = 0;
  virtual unsigned getMinPrefetchStride() = 0;
@ -1026,7 +1034,11 @@ public:
  unsigned getRegisterBitWidth(bool Vector) override {
    return Impl.getRegisterBitWidth(Vector);
  }
-
+  bool shouldConsiderAddressTypePromotion(
+      const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
+    return Impl.shouldConsiderAddressTypePromotion(
+        I, AllowPromotionWithoutCommonHeader);
+  }
  unsigned getCacheLineSize() override {
    return Impl.getCacheLineSize();
  }
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@ -303,6 +303,13 @@ public:

  unsigned getRegisterBitWidth(bool Vector) { return 32; }

+  bool
+  shouldConsiderAddressTypePromotion(const Instruction &I,
+                                     bool &AllowPromotionWithoutCommonHeader) {
+    AllowPromotionWithoutCommonHeader = false;
+    return false;
+  }
+
  unsigned getCacheLineSize() { return 0; }

  unsigned getPrefetchDistance() { return 0; }
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@ -269,6 +269,12 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
  return TTIImpl->getRegisterBitWidth(Vector);
 }

+bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
+    const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
+  return TTIImpl->shouldConsiderAddressTypePromotion(
+      I, AllowPromotionWithoutCommonHeader);
+}
+
 unsigned TargetTransformInfo::getCacheLineSize() const {
  return TTIImpl->getCacheLineSize();
 }
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@ -138,10 +138,17 @@ static cl::opt<bool> ForceSplitStore(
    "force-split-store", cl::Hidden, cl::init(false),
    cl::desc("Force store splitting no matter what the target query says."));

+static cl::opt<bool>
+EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
+    cl::desc("Enable merging of redundant sexts when one is dominating"
+    " the other."), cl::init(true));
+
 namespace {
 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
 typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
 typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
+typedef SmallVector<Instruction *, 16> SExts;
+typedef DenseMap<Value *, SExts> ValueToSExts;
 class TypePromotionTransaction;

  class CodeGenPrepare : public FunctionPass {
@ -170,6 +177,15 @@ class TypePromotionTransaction;
    /// promotion for the current function.
    InstrToOrigTy PromotedInsts;

+    /// Keep track of instructions removed during promotion.
+    SetOfInstrs RemovedInsts;
+
+    /// Keep track of sext chains based on their initial value.
+    DenseMap<Value *, Instruction *> SeenChainsForSExt;
+
+    /// Keep track of SExt promoted.
+    ValueToSExts ValToSExtendedUses;
+
    /// True if CFG is modified in any way.
    bool ModifiedDT;

@ -211,7 +227,7 @@ class TypePromotionTransaction;
                            Type *AccessTy, unsigned AS);
    bool optimizeInlineAsmInst(CallInst *CS);
    bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
-    bool moveExtToFormExtLoad(Instruction *&I);
+    bool optimizeExt(Instruction *&I);
    bool optimizeExtUses(Instruction *I);
    bool optimizeLoadExt(LoadInst *I);
    bool optimizeSelectInst(SelectInst *SI);
@ -226,6 +242,12 @@ class TypePromotionTransaction;
                          const SmallVectorImpl<Instruction *> &Exts,
                          SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
                          unsigned CreatedInstsCost = 0);
+    bool mergeSExts(Function &F);
+    bool performAddressTypePromotion(
+        Instruction *&Inst,
+        bool AllowPromotionWithoutCommonHeader,
+        bool HasPromoted, TypePromotionTransaction &TPT,
+        SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
    bool splitBranchCondition(Function &F);
    bool simplifyOffsetableRelocate(Instruction &I);
    bool splitIndirectCriticalEdges(Function &F);
@ -310,6 +332,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
  bool MadeChange = true;
  while (MadeChange) {
    MadeChange = false;
+    SeenChainsForSExt.clear();
+    ValToSExtendedUses.clear();
+    RemovedInsts.clear();
    for (Function::iterator I = F.begin(); I != F.end(); ) {
      BasicBlock *BB = &*I++;
      bool ModifiedDTOnIteration = false;
@ -319,6 +344,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
      if (ModifiedDTOnIteration)
        break;
    }
+    if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
+      MadeChange |= mergeSExts(F);
+
+    // Really free removed instructions during promotion.
+    for (Instruction *I : RemovedInsts)
+      delete I;
+
    EverMadeChange |= MadeChange;
  }

@ -2793,25 +2825,30 @@ class TypePromotionTransaction {
    OperandsHider Hider;
    /// Keep track of the uses replaced, if any.
    UsesReplacer *Replacer;
+    /// Keep track of instructions removed.
+    SetOfInstrs &RemovedInsts;

  public:
    /// \brief Remove all reference of \p Inst and optinally replace all its
    /// uses with New.
+    /// \p RemovedInsts Keep track of the instructions removed by this Action.
    /// \pre If !Inst->use_empty(), then New != nullptr
-    InstructionRemover(Instruction *Inst, Value *New = nullptr)
+    InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
+                       Value *New = nullptr)
        : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
-          Replacer(nullptr) {
+          Replacer(nullptr), RemovedInsts(RemovedInsts) {
      if (New)
        Replacer = new UsesReplacer(Inst, New);
      DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
+      RemovedInsts.insert(Inst);
+      /// The instructions removed here will be freed after completing
+      /// optimizeBlock() for all blocks as we need to keep track of the
+      /// removed instructions during promotion.
      Inst->removeFromParent();
    }

    ~InstructionRemover() override { delete Replacer; }

-    /// \brief Really remove the instruction.
-    void commit() override { delete Inst; }
-
    /// \brief Resurrect the instruction and reassign it to the proper uses if
    /// new value was provided when build this action.
    void undo() override {
@ -2820,6 +2857,7 @@ class TypePromotionTransaction {
      if (Replacer)
        Replacer->undo();
      Hider.undo();
+      RemovedInsts.erase(Inst);
    }
  };

@ -2828,6 +2866,10 @@ public:
  /// The restoration point is a pointer to an action instead of an iterator
  /// because the iterator may be invalidated but not the pointer.
  typedef const TypePromotionAction *ConstRestorationPt;
+
+  TypePromotionTransaction(SetOfInstrs &RemovedInsts)
+      : RemovedInsts(RemovedInsts) {}
+
  /// Advocate every changes made in that transaction.
  void commit();
  /// Undo all the changes made after the given point.
@ -2859,6 +2901,7 @@ private:
  /// The ordered list of actions made so far.
  SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
  typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
+  SetOfInstrs &RemovedInsts;
 };

 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
@ -2870,7 +2913,8 @@ void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
 void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
                                                Value *NewVal) {
  Actions.push_back(
-      make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
+      make_unique<TypePromotionTransaction::InstructionRemover>(Inst,
+                                                         RemovedInsts, NewVal));
 }

 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
@ -4097,7 +4141,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
  bool IsNumUsesConsensusValid = false;
  SmallVector<Instruction*, 16> AddrModeInsts;
  ExtAddrMode AddrMode;
-  TypePromotionTransaction TPT;
+  TypePromotionTransaction TPT(RemovedInsts);
  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
      TPT.getRestorationPoint();
  while (!worklist.empty()) {
@ -4492,20 +4536,6 @@ static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
 /// them.
 ///
 /// \return true if some promotion happened, false otherwise.
-///
-/// Example:
-/// \code
-/// %ld = load i32* %addr
-/// %add = add nuw i32 %ld, 4
-/// %zext = zext i32 %add to i64
-/// \endcode
-/// =>
-/// \code
-/// %ld = load i32* %addr
-/// %zext = zext i32 %ld to i64
-/// %add = add nuw i64 %zext, 4
-/// \endcode
-/// Thanks to the promotion, we can match zext(load i32*) to i64.
 bool CodeGenPrepare::tryToPromoteExts(
    TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
    SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
@ -4601,6 +4631,46 @@ bool CodeGenPrepare::tryToPromoteExts(
  return Promoted;
 }

+/// Merging redundant sexts when one is dominating the other.
+bool CodeGenPrepare::mergeSExts(Function &F) {
+  DominatorTree DT(F);
+  bool Changed = false;
+  for (auto &Entry : ValToSExtendedUses) {
+    SExts &Insts = Entry.second;
+    SExts CurPts;
+    for (Instruction *Inst : Insts) {
+      if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
+          Inst->getOperand(0) != Entry.first)
+        continue;
+      bool inserted = false;
+      for (auto &Pt : CurPts) {
+        if (DT.dominates(Inst, Pt)) {
+          Pt->replaceAllUsesWith(Inst);
+          RemovedInsts.insert(Pt);
+          Pt->removeFromParent();
+          Pt = Inst;
+          inserted = true;
+          Changed = true;
+          break;
+        }
+        if (!DT.dominates(Pt, Inst))
+          // Give up if we need to merge in a common dominator as the
+          // expermients show it is not profitable.
+          continue;
+        Inst->replaceAllUsesWith(Pt);
+        RemovedInsts.insert(Inst);
+        Inst->removeFromParent();
+        inserted = true;
+        Changed = true;
+        break;
+      }
+      if (!inserted)
+        CurPts.push_back(Inst);
+    }
+  }
+  return Changed;
+}
+
 /// Return true, if an ext(load) can be formed from an extension in
 /// \p MovedExts.
 bool CodeGenPrepare::canFormExtLd(
@ -4646,49 +4716,163 @@ bool CodeGenPrepare::canFormExtLd(
 /// Move a zext or sext fed by a load into the same basic block as the load,
 /// unless conditions are unfavorable. This allows SelectionDAG to fold the
 /// extend into the load.
-/// \p I[in/out] the extension may be modified during the process if some
-/// promotions apply.
 ///
-bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
-  // ExtLoad formation infrastructure requires TLI to be effective.
+/// E.g.,
+/// \code
+/// %ld = load i32* %addr
+/// %add = add nuw i32 %ld, 4
+/// %zext = zext i32 %add to i64
+// \endcode
+/// =>
+/// \code
+/// %ld = load i32* %addr
+/// %zext = zext i32 %ld to i64
+/// %add = add nuw i64 %zext, 4
+/// \encode
+/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
+/// allow us to match zext(load i32*) to i64.
+///
+/// Also, try to promote the computations used to obtain a sign extended
+/// value used into memory accesses.
+/// E.g.,
+/// \code
+/// a = add nsw i32 b, 3
+/// d = sext i32 a to i64
+/// e = getelementptr ..., i64 d
+/// \endcode
+/// =>
+/// \code
+/// f = sext i32 b to i64
+/// a = add nsw i64 f, 3
+/// e = getelementptr ..., i64 a
+/// \endcode
+///
+/// \p Inst[in/out] the extension may be modified during the process if some
+/// promotions apply.
+bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
+  // ExtLoad formation and address type promotion infrastructure requires TLI to
+  // be effective.
  if (!TLI)
    return false;

-  // Try to promote a chain of computation if it allows to form
-  // an extended load.
-  TypePromotionTransaction TPT;
+  bool AllowPromotionWithoutCommonHeader = false;
+  /// See if it is an interesting sext operations for the address type
+  /// promotion before trying to promote it, e.g., the ones with the right
+  /// type and used in memory accesses.
+  bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
+      *Inst, AllowPromotionWithoutCommonHeader);
+  TypePromotionTransaction TPT(RemovedInsts);
  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
      TPT.getRestorationPoint();
  SmallVector<Instruction *, 1> Exts;
-  SmallVector<Instruction *, 2> LastMovedExts;
-  Exts.push_back(I);
+  SmallVector<Instruction *, 2> SpeculativelyMovedExts;
+  Exts.push_back(Inst);

-  bool HasPromoted = tryToPromoteExts(TPT, Exts, LastMovedExts);
+  bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);

  // Look for a load being extended.
  LoadInst *LI = nullptr;
-  Instruction *OldExt = I;
-  if (!canFormExtLd(LastMovedExts, LI, I, HasPromoted)) {
-    I = OldExt;
-    TPT.rollback(LastKnownGood);
+  Instruction *ExtFedByLoad;
+
+  // Try to promote a chain of computation if it allows to form an extended
+  // load.
+  if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
+    assert(LI && ExtFedByLoad && "Expect a valid load and extension");
+    TPT.commit();
+    // Move the extend into the same block as the load
+    ExtFedByLoad->removeFromParent();
+    ExtFedByLoad->insertAfter(LI);
+    // CGP does not check if the zext would be speculatively executed when moved
+    // to the same basic block as the load. Preserving its original location
+    // would pessimize the debugging experience, as well as negatively impact
+    // the quality of sample pgo. We don't want to use "line 0" as that has a
+    // size cost in the line-table section and logically the zext can be seen as
+    // part of the load. Therefore we conservatively reuse the same debug
+    // location for the load and the zext.
+    ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
+    ++NumExtsMoved;
+    Inst = ExtFedByLoad;
+    return true;
+  }
+
+  // Continue promoting SExts if known as considerable depending on targets.
+  if (ATPConsiderable &&
+      performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
+                                  HasPromoted, TPT, SpeculativelyMovedExts))
+    return true;
+
+  TPT.rollback(LastKnownGood);
+  return false;
+}
+
+// Perform address type promotion if doing so is profitable.
+// If AllowPromotionWithoutCommonHeader == false, we should find other sext
+// instructions that sign extended the same initial value. However, if
+// AllowPromotionWithoutCommonHeader == true, we expect promoting the
+// extension is just profitable.
+bool CodeGenPrepare::performAddressTypePromotion(
+    Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
+    bool HasPromoted, TypePromotionTransaction &TPT,
+    SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
+  bool Promoted = false;
+  SmallPtrSet<Instruction *, 1> UnhandledExts;
+  bool AllSeenFirst = true;
+  for (auto I : SpeculativelyMovedExts) {
+    Value *HeadOfChain = I->getOperand(0);
+    DenseMap<Value *, Instruction *>::iterator AlreadySeen =
+        SeenChainsForSExt.find(HeadOfChain);
+    // If there is an unhandled SExt which has the same header, try to promote
+    // it as well.
+    if (AlreadySeen != SeenChainsForSExt.end()) {
+      if (AlreadySeen->second != nullptr)
+        UnhandledExts.insert(AlreadySeen->second);
+      AllSeenFirst = false;
+    }
+  }
+
+  if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
+                        SpeculativelyMovedExts.size() == 1)) {
+    TPT.commit();
+    if (HasPromoted)
+      Promoted = true;
+    for (auto I : SpeculativelyMovedExts) {
+      Value *HeadOfChain = I->getOperand(0);
+      SeenChainsForSExt[HeadOfChain] = nullptr;
+      ValToSExtendedUses[HeadOfChain].push_back(I);
+    }
+    // Update Inst as promotion happen.
+    Inst = SpeculativelyMovedExts.pop_back_val();
+  } else {
+    // This is the first chain visited from the header, keep the current chain
+    // as unhandled. Defer to promote this until we encounter another SExt
+    // chain derived from the same header.
+    for (auto I : SpeculativelyMovedExts) {
+      Value *HeadOfChain = I->getOperand(0);
+      SeenChainsForSExt[HeadOfChain] = Inst;
+    }
    return false;
  }

-  // Move the extend into the same block as the load, so that SelectionDAG
-  // can fold it.
-  TPT.commit();
-  I->removeFromParent();
-  I->insertAfter(LI);
-  // CGP does not check if the zext would be speculatively executed when moved
-  // to the same basic block as the load. Preserving its original location would
-  // pessimize the debugging experience, as well as negatively impact the
-  // quality of sample pgo. We don't want to use "line 0" as that has a
-  // size cost in the line-table section and logically the zext can be seen as
-  // part of the load. Therefore we conservatively reuse the same debug location
-  // for the load and the zext.
-  I->setDebugLoc(LI->getDebugLoc());
-  ++NumExtsMoved;
-  return true;
+  if (!AllSeenFirst && !UnhandledExts.empty())
+    for (auto VisitedSExt : UnhandledExts) {
+      if (RemovedInsts.count(VisitedSExt))
+        continue;
+      TypePromotionTransaction TPT(RemovedInsts);
+      SmallVector<Instruction *, 1> Exts;
+      SmallVector<Instruction *, 2> Chains;
+      Exts.push_back(VisitedSExt);
+      bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
+      TPT.commit();
+      if (HasPromoted)
+        Promoted = true;
+      for (auto I : Chains) {
+        Value *HeadOfChain = I->getOperand(0);
+        // Mark this as handled.
+        SeenChainsForSExt[HeadOfChain] = nullptr;
+        ValToSExtendedUses[HeadOfChain].push_back(I);
+      }
+    }
+  return Promoted;
 }

 bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
@ -5802,7 +5986,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
              TargetLowering::TypeExpandInteger) {
        return SinkCast(CI);
      } else {
-        bool MadeChange = moveExtToFormExtLoad(I);
+        bool MadeChange = optimizeExt(I);
        return MadeChange | optimizeExtUses(I);
      }
    }
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@ -118,7 +118,7 @@ EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
 static cl::opt<bool>
    EnableAddressTypePromotion("aarch64-enable-type-promotion", cl::Hidden,
                               cl::desc("Enable the type promotion pass"),
-                               cl::init(true));
+                               cl::init(false));

 static cl::opt<bool>
    EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@ -626,6 +626,38 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
  return true;
 }

+/// See if \p I should be considered for address type promotion. We check if \p
+/// I is a sext with right type and used in memory accesses. If it used in a
+/// "complex" getelementptr, we allow it to be promoted without finding other
+/// sext instructions that sign extended the same initial value. A getelementptr
+/// is considered as "complex" if it has more than 2 operands.
+bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
+    const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
+  bool Considerable = false;
+  AllowPromotionWithoutCommonHeader = false;
+  if (!isa<SExtInst>(&I))
+    return false;
+  Type *ConsideredSExtType =
+      Type::getInt64Ty(I.getParent()->getParent()->getContext());
+  if (I.getType() != ConsideredSExtType)
+    return false;
+  // See if the sext is the one with the right type and used in at least one
+  // GetElementPtrInst.
+  for (const User *U : I.users()) {
+    if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
+      Considerable = true;
+      // A getelementptr is considered as "complex" if it has more than 2
+      // operands. We will promote a SExt used in such complex GEP as we
+      // expect some computation to be merged if they are done on 64 bits.
+      if (GEPInst->getNumOperands() > 2) {
+        AllowPromotionWithoutCommonHeader = true;
+        break;
+      }
+    }
+  }
+  return Considerable;
+}
+
 unsigned AArch64TTIImpl::getCacheLineSize() {
  return ST->getCacheLineSize();
 }
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@ -121,6 +121,10 @@ public:
                                 ArrayRef<unsigned> Indices, unsigned Alignment,
                                 unsigned AddressSpace);

+  bool
+  shouldConsiderAddressTypePromotion(const Instruction &I,
+                                     bool &AllowPromotionWithoutCommonHeader);
+
  unsigned getCacheLineSize();

  unsigned getPrefetchDistance();
--- a/test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll
+++ b/test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll
@ -0,0 +1,68 @@
+; RUN: opt -codegenprepare < %s -S | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+%struct.match_state = type { i64, i64  }
+
+; %add is also promoted by forking an extra sext.
+define void @promoteTwoOne(i32 %i, i32 %j, i64* %P1, i64* %P2 ) {
+; CHECK-LABEL: @promoteTwoOne
+; CHECK-LABEL: entry:
+; CHECK: %[[SEXT1:.*]] = sext i32 %i to i64
+; CHECK: %[[SEXT2:.*]] = sext i32 %j to i64
+; CHECK: %add = add nsw i64 %[[SEXT1]], %[[SEXT2]]
+entry:
+  %add = add nsw i32 %i, %j
+  %s = sext i32 %add to i64
+  %addr1 = getelementptr inbounds i64, i64* %P1, i64 %s
+  store i64 %s, i64* %addr1
+  %s2 = sext i32 %i to i64
+  %addr2 = getelementptr inbounds i64, i64* %P2, i64 %s2
+  store i64 %s2, i64* %addr2
+  ret void
+}
+
+; Both %add1 and %add2 are promoted by forking extra sexts.
+define void @promoteTwoTwo(i32 %i, i32 %j, i32 %k, i64* %P1, i64* %P2) {
+; CHECK-LABEL: @promoteTwoTwo
+; CHECK-LABEL:entry:
+; CHECK: %[[SEXT1:.*]] = sext i32 %j to i64
+; CHECK: %[[SEXT2:.*]]  = sext i32 %i to i64
+; CHECK: %add1 = add nsw i64 %[[SEXT1]], %[[SEXT2]]
+; CHECK: %[[SEXT3:.*]] = sext i32 %k to i64
+; CHECK: %add2 = add nsw i64 %[[SEXT1]], %[[SEXT3]]
+entry:
+  %add1 = add nsw i32 %j, %i
+  %s = sext i32 %add1 to i64
+  %addr1 = getelementptr inbounds i64, i64* %P1, i64 %s
+  store i64 %s, i64* %addr1
+  %add2 = add nsw i32 %j, %k
+  %s2 = sext i32 %add2 to i64
+  %addr2 = getelementptr inbounds i64, i64* %P2, i64 %s2
+  store i64 %s2, i64* %addr2
+  ret void
+}
+
+define i64 @promoteGEPSunk(i1 %cond, i64* %base, i32 %i) {
+; CHECK-LABEL: @promoteGEPSunk
+; CHECK-LABEL: entry:
+; CHECK:  %[[SEXT:.*]] = sext i32 %i to i64
+; CHECK:  %add = add nsw i64 %[[SEXT]], 1
+; CHECK:  %add2 = add nsw i64 %[[SEXT]], 2
+entry:
+  %add = add nsw i32 %i, 1
+  %s = sext i32 %add to i64
+  %addr = getelementptr inbounds i64, i64* %base, i64 %s
+  %add2 = add nsw i32 %i,  2
+  %s2 = sext i32 %add2 to i64
+  %addr2 = getelementptr inbounds i64, i64* %base, i64 %s2
+  br i1 %cond, label %if.then, label %if.then2
+if.then:
+  %v = load i64, i64* %addr
+  %v2 = load i64, i64* %addr2
+  %r = add i64 %v, %v2
+  ret i64 %r
+if.then2:
+  ret i64 0;
+}
--- a/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
+++ b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
@ -10,14 +10,17 @@ define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
 ; CHECK: fullGtU
 ; CHECK: adrp [[PAGE:x[0-9]+]], _block@GOTPAGE
 ; CHECK: ldr [[ADDR:x[0-9]+]], {{\[}}[[PAGE]], _block@GOTPAGEOFF]
+; CHECK: sxtw [[I1:x[0-9]+]], w0
+; CHECK: sxtw [[I2:x[0-9]+]], w1
 ; CHECK-NEXT: ldr [[BLOCKBASE:x[0-9]+]], {{\[}}[[ADDR]]]
-; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]],  w0, sxtw]
-; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], w1, sxtw]
+; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]], [[I1]]]
+; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], [[I2]]]
+
 ; CHECK-NEXT: cmp [[BLOCKVAL1]], [[BLOCKVAL2]]
 ; CHECK-NEXT: b.ne
 ; Next BB
-; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], w1, sxtw
-; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], w0, sxtw
+; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], [[I2]]
+; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], [[I1]]
 ; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1]
 ; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1]
 ; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]]