Reapply "[llvm] Native size estimator for training -Oz inliner"

This reverts commit 9908a3b9f5. The fix was to exclude the content of TFUtils.h (automatically included in the LLVM_Analysis module, when LLVM_ENABLE_MODULES is enabled). Differential Revision: https://reviews.llvm.org/D82817
2024-12-20 14:28:47 +00:00 · 2020-07-13 14:12:32 -07:00 · 2020-07-13 14:12:32 -07:00 · caf395ee8c
commit caf395ee8c
parent 66550c36f4
14 changed files with 11466 additions and 10 deletions
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@ -981,6 +981,18 @@ if (NOT TENSORFLOW_AOT_PATH STREQUAL "")
    ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/tf_runtime)
 endif()

+set(TENSORFLOW_C_LIB_PATH "" CACHE PATH "Path to TensorFlow C library install")
+find_library(tensorflow_c_api tensorflow PATHS ${TENSORFLOW_C_LIB_PATH}/lib)
+
+# Similar to the above Tensorflow dependency, please refer to the same script.
+# In this case, the latest C API library is available for download from
+# https://www.tensorflow.org/install/lang_c
+if (tensorflow_c_api)
+  set(LLVM_HAVE_TF_API "ON" CACHE BOOL "Full Tensorflow API available")
+  add_definitions("-DLLVM_HAVE_TF_API")
+  include_directories(${TENSORFLOW_C_LIB_PATH}/include)
+endif()
+
 # Put this before tblgen. Else we have a circular dependence.
 add_subdirectory(lib/Demangle)
 add_subdirectory(lib/Support)
--- a/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
+++ b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
@ -0,0 +1,35 @@
+//===- InlineSizeEstimatorAnalysis.h - ML size estimator --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
+#define LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class Function;
+
+class TFModelEvaluator;
+class InlineSizeEstimatorAnalysis
+    : public AnalysisInfoMixin<InlineSizeEstimatorAnalysis> {
+public:
+  InlineSizeEstimatorAnalysis();
+  InlineSizeEstimatorAnalysis(InlineSizeEstimatorAnalysis &&);
+  ~InlineSizeEstimatorAnalysis();
+
+  static AnalysisKey Key;
+  using Result = Optional<size_t>;
+  Result run(const Function &F, FunctionAnalysisManager &FAM);
+  static bool isEvaluatorRequested();
+
+private:
+  std::unique_ptr<TFModelEvaluator> Evaluator;
+};
+} // namespace llvm
+#endif // LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
--- a/llvm/include/llvm/Analysis/Utils/TFUtils.h
+++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h
@ -0,0 +1,138 @@
+//===- TFUtils.h - utilities for tensorflow C API ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_ANALYSIS_UTILS_TFUTILS_H
+#define LLVM_ANALYSIS_UTILS_TFUTILS_H
+
+#ifdef LLVM_HAVE_TF_API
+#include "tensorflow/c/c_api.h"
+#include "llvm/IR/LLVMContext.h"
+
+#include <memory>
+#include <vector>
+
+namespace llvm {
+
+/// Load a SavedModel, find the given inputs and outputs, and setup storage
+/// for input tensors. The user is responsible for correctly dimensioning the
+/// input tensors and setting their values before calling evaluate().
+/// To initialize:
+/// - construct the object
+/// - initialize the input tensors using initInput. Indices must correspond to
+///   indices in the InputNames used at construction.
+/// To use:
+/// - set input values by using getInput to get each input tensor, and then
+///   setting internal scalars, for all dimensions (tensors are row-major:
+///   https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/c/c_api.h#L205)
+/// - prepare an output vector of TF_Output* type, with the correct number of
+/// outputs (i.e. same as OutputNames). Initialize the vector with nullptr
+/// values.
+/// - call evaluate. The input tensors' values are not consumed after this, and
+///   may still be read.
+/// - use the outputs in the output vector
+/// - deallocate each output tensor in the output vector, using TF_DeleteTensor.
+class TFModelEvaluator final {
+public:
+  /// The result of a model evaluation. Handles the lifetime of the output
+  /// TF_Tensor objects, which means that their values need to be used before
+  /// the EvaluationResult's dtor is called.
+  class EvaluationResult {
+  public:
+    ~EvaluationResult() {
+      for (auto *P : Output)
+        if (P)
+          TF_DeleteTensor(P);
+    }
+
+    EvaluationResult(const EvaluationResult &) = delete;
+    EvaluationResult(EvaluationResult &&Other)
+        : OutputSize(Other.OutputSize), Output(std::move(Other.Output)) {
+      Other.Output.clear();
+    };
+
+    /// Get a pointer to the first element of the tensor at Index.
+    template <typename T> T *getTensorValue(size_t Index) {
+      return static_cast<T *>(TF_TensorData(Output[Index]));
+    }
+
+  private:
+    friend class TFModelEvaluator;
+    EvaluationResult(size_t OutputSize)
+        : OutputSize(OutputSize), Output(OutputSize){};
+
+    const size_t OutputSize;
+    std::vector<TF_Tensor *> Output;
+  };
+
+  using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>;
+  using TFSessionOptionsPtr =
+      std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>;
+  using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>;
+
+  TFModelEvaluator(StringRef SavedModelPath,
+                   const std::vector<std::string> &InputNames,
+                   const std::vector<std::string> &OutputNames,
+                   const char *Tags = "serve");
+  ~TFModelEvaluator();
+  TFModelEvaluator(const TFModelEvaluator &) = delete;
+  TFModelEvaluator(TFModelEvaluator &&) = delete;
+
+  /// Evaluate the model, assuming it is valid. Returns None if the evaluation
+  /// fails or the model is invalid, or an EvaluationResult otherwise. The
+  /// inputs are assumed to have been already provided via getInput(). When
+  /// returning None, it also marks the object invalid. Pass an Output vector
+  /// with the same size as OutputNames, but with nullptr values. evaluate()
+  /// will populate it with tensors, matching in index the corresponding
+  /// OutputNames. The caller is responsible for the deallocation of those
+  /// tensors, using TF_DeleteTensor.
+  Optional<EvaluationResult> evaluate();
+
+  /// Provides access to the input vector. It is already dimensioned correctly,
+  /// but the values need to be allocated by the user.
+  std::vector<TF_Tensor *> &getInput() { return Input; }
+
+  /// Returns true if the tensorflow model was loaded successfully, false
+  /// otherwise.
+  bool isValid() const { return !!Session; }
+
+  /// Initialize the input at Index as a tensor of the given type and dimensions
+  void initInput(int Index, TF_DataType Type,
+                 const std::vector<int64_t> &Dimensions);
+
+private:
+  /// The objects necessary for carrying out an evaluation of the SavedModel.
+  /// They are expensive to set up, and we maintain them accross all the
+  /// evaluations of the model.
+  TF_Session *Session = nullptr;
+  TFGraphPtr Graph;
+  TFSessionOptionsPtr Options;
+
+  /// The specification of the input nodes.
+  std::vector<TF_Output> InputFeed;
+
+  /// The input tensors. They must match by index of the corresponding InputFeed
+  /// value. We set up the tensors once and just mutate theirs scalars before
+  /// each evaluation. The input tensors keep their value after an evaluation.
+  std::vector<TF_Tensor *> Input;
+
+  /// The specification of the output nodes. When evaluating, the tensors in the
+  /// output tensor vector must match by index the corresponding element in the
+  /// OutputFeed.
+  std::vector<TF_Output> OutputFeed;
+
+  /// Reusable utility for deleting the session.
+  void deleteSession();
+
+  /// Reusable utility for ensuring we can bind the requested Name to a node in
+  /// the SavedModel Graph.
+  bool checkReportAndReset(const TF_Output &Output, StringRef Name);
+};
+} // namespace llvm
+
+#endif // LLVM_HAVE_TF_API
+#endif // LLVM_ANALYSIS_UTILS_TFUTILS_H
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@ -1,17 +1,35 @@
 set(CommonMLSources MLInlineAdvisor.cpp)
 set(ReleaseModeMLSources ReleaseModeModelRunner.cpp)
+set(DevelopmentModeMLSources TFUtils.cpp)

-if (DEFINED LLVM_HAVE_TF_AOT)
-  include(TensorFlowCompile)
-  tfcompile(models/inliner serve action InlinerSizeModel llvm::InlinerSizeModel)
-  list(APPEND ReleaseModeMLSources
-    $<TARGET_OBJECTS:tf_xla_runtime_objects>
-    ${GENERATED_OBJS}
-  )
-  set(MLPolicySources ${CommonMLSources} ${ReleaseModeMLSources})
+if (DEFINED LLVM_HAVE_TF_AOT OR DEFINED LLVM_HAVE_TF_API)
+  set(MLPolicySources ${CommonMLSources})
+  if (DEFINED LLVM_HAVE_TF_AOT)
+    include(TensorFlowCompile)
+    tfcompile(models/inliner serve action InlinerSizeModel llvm::InlinerSizeModel)
+    list(APPEND ReleaseModeMLSources
+      $<TARGET_OBJECTS:tf_xla_runtime_objects>
+      ${GENERATED_OBJS}
+    )
+    LIST(APPEND MLPolicySources ${ReleaseModeMLSources})
+  else()
+    LIST(APPEND LLVM_OPTIONAL_SOURCES ${ReleaseModeMLSources})
+  endif()
+
+  if (DEFINED LLVM_HAVE_TF_API)
+    LIST(APPEND MLPolicySources ${DevelopmentModeMLSources})
+    LIST(APPEND MLLinkDeps ${tensorflow_c_api})
+  else()
+    LIST(APPEND LLVM_OPTIONAL_SOURCES ${DevelopmentModeMLSources})
+  endif()
 else()
-  set(LLVM_OPTIONAL_SOURCES ${CommonMLSources} ${ReleaseModeMLSources})
+  LIST(APPEND LLVM_OPTIONAL_SOURCES 
+    ${CommonMLSources}
+    ${DevelopmentModeMLSources}
+    ${ReleaseModeMLSources}
+    )
 endif()
+  

 add_llvm_component_library(LLVMAnalysis
  AliasAnalysis.cpp
@ -57,6 +75,7 @@ add_llvm_component_library(LLVMAnalysis
  InlineCost.cpp
  InlineAdvisor.cpp
  InlineFeaturesAnalysis.cpp
+  InlineSizeEstimatorAnalysis.cpp
  InstCount.cpp
  InstructionPrecedenceTracking.cpp
  InstructionSimplify.cpp
@ -124,4 +143,7 @@ add_llvm_component_library(LLVMAnalysis

  DEPENDS
  intrinsics_gen
+
+  LINK_LIBS
+  ${MLLinkDeps}
  )
--- a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
+++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
@ -0,0 +1,299 @@
+//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements feature and label extraction for offline supervised learning
+// of a IR to native size model.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
+
+#ifdef LLVM_HAVE_TF_API
+#include "llvm/Analysis/Utils/TFUtils.h"
+#endif
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <deque>
+
+using namespace llvm;
+
+AnalysisKey InlineSizeEstimatorAnalysis::Key;
+
+#define DEBUG_TYPE "inline-size-estimator"
+
+#ifdef LLVM_HAVE_TF_API
+cl::opt<std::string> TFIR2NativeModelPath(
+    "ml-inliner-ir2native-model", cl::Hidden,
+    cl::desc("Path to saved model evaluating native size from IR."));
+
+namespace {
+unsigned getMaxInstructionID() {
+#define LAST_OTHER_INST(NR) return NR;
+#include "llvm/IR/Instruction.def"
+}
+
+class IRToNativeSizeLearning {
+public:
+  enum class NamedFeatureIndex : size_t {
+    InitialSize,
+    Blocks,
+    Calls,
+    IsLocal,
+    IsLinkOnceODR,
+    IsLinkOnce,
+    Loops,
+    MaxLoopDepth,
+    MaxDomTreeLevel,
+
+    NumNamedFeatures
+  };
+  static const size_t NumNamedFeatures =
+      static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
+  struct FunctionFeatures {
+    static std::vector<std::pair<size_t, size_t>>
+        ImportantInstructionSuccessions;
+    static const size_t FeatureCount;
+
+    std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
+    std::vector<int32_t> InstructionHistogram;
+    std::vector<int32_t> InstructionPairHistogram;
+
+    void fillTensor(int32_t *Ptr) const;
+    int32_t &operator[](NamedFeatureIndex Pos) {
+      return NamedFeatures[static_cast<size_t>(Pos)];
+    }
+  };
+  IRToNativeSizeLearning() = default;
+
+  static FunctionFeatures getFunctionFeatures(Function &F,
+                                              FunctionAnalysisManager &FAM);
+
+private:
+  /// Sort once the feature tuples.
+  struct SortFeatureTuples {
+    bool IsSorted = false;
+    SortFeatureTuples() {
+      std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(),
+                FunctionFeatures::ImportantInstructionSuccessions.end());
+      IsSorted = true;
+    }
+  };
+
+  static llvm::ManagedStatic<SortFeatureTuples> TupleSorter;
+
+  static bool ensureSortedTuples() { return TupleSorter->IsSorted; }
+};
+llvm::ManagedStatic<IRToNativeSizeLearning::SortFeatureTuples>
+    IRToNativeSizeLearning::TupleSorter;
+
+// This is a point in time - we determined including these pairs of
+// consecutive instructions (in the IR layout available at inline time) as
+// features improves the model performance. We want to move away from manual
+// feature selection.
+// The vector is given in opcode pairs rather than labels because 1) labels
+// weren't readily available, and 2) the successions were hand - extracted
+std::vector<std::pair<size_t, size_t>>
+    IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions =
+        {{1, 34},  {15, 27}, {53, 53}, {53, 34}, {1, 11},  {32, 2},  {2, 48},
+         {28, 48}, {1, 45},  {49, 32}, {57, 56}, {55, 53}, {1, 28},  {57, 34},
+         {1, 1},   {32, 28}, {32, 15}, {49, 28}, {53, 1},  {2, 53},  {48, 34},
+         {28, 53}, {2, 32},  {1, 40},  {32, 48}, {29, 56}, {56, 32}, {55, 56},
+         {48, 56}, {1, 31},  {33, 34}, {2, 28},  {1, 12},  {55, 1},  {31, 31},
+         {65, 1},  {33, 56}, {32, 32}, {13, 13}, {1, 26},  {13, 26}, {2, 1},
+         {1, 33},  {47, 49}, {64, 1},  {2, 38},  {34, 53}, {48, 2},  {55, 34},
+         {34, 32}, {1, 5},   {56, 13}, {2, 2},   {2, 49},  {33, 2},  {49, 39},
+         {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29},
+         {47, 15}, {13, 34}, {2, 33},  {32, 49}, {49, 34}, {56, 33}, {1, 30},
+         {33, 33}, {31, 33}, {2, 29},  {56, 7},  {32, 13}, {2, 55},  {56, 56},
+         {2, 34},  {1, 42},  {34, 49}, {1, 20},  {32, 33}, {1, 25},  {53, 28},
+         {1, 14},  {31, 49}, {28, 2},  {2, 13},  {2, 56},  {1, 32},  {56, 53},
+         {65, 65}, {33, 53}, {64, 64}, {13, 2},  {34, 33}, {1, 4},   {49, 2},
+         {1, 9},   {56, 1},  {33, 1},  {53, 57}, {32, 53}, {13, 56}, {32, 56},
+         {55, 55}, {1, 18},  {49, 56}, {34, 34}, {1, 7},   {56, 64}, {32, 1},
+         {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32},
+         {32, 40}, {1, 29},  {53, 2},  {34, 1},  {32, 34}, {49, 49}, {1, 24},
+         {40, 34}, {1, 13},  {38, 34}, {29, 2},  {34, 2},  {1, 39},  {1, 22},
+         {1, 27},  {49, 1},  {1, 8},   {56, 2}};
+
+// We have: 9 calculated features (the features here); 1 feature for each
+// instruction opcode; and 1 feature for each manually-identified sequence.
+// For the latter 2, we build a histogram: we count the number of
+// occurrences of each instruction opcode or succession of instructions,
+// respectively.
+// Note that instruction opcodes start from 1. For convenience, we also have an
+// always 0 feature for the '0' opcode, hence the extra 1.
+const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
+    IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions
+        .size() +
+    getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures;
+
+size_t getSize(Function &F, TargetTransformInfo &TTI) {
+  size_t Ret = 0;
+  for (auto &BB : F)
+    for (auto &I : BB)
+      Ret += TTI.getInstructionCost(
+          &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize);
+  return Ret;
+}
+
+size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
+  auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
+  return getSize(F, TTI);
+}
+
+unsigned getMaxDominatorTreeDepth(const Function &F,
+                                  const DominatorTree &Tree) {
+  unsigned Ret = 0;
+  for (auto &BB : F)
+    if (auto *TN = Tree.getNode(&BB))
+      Ret = std::max(Ret, TN->getLevel());
+  return Ret;
+}
+} // namespace
+
+IRToNativeSizeLearning::FunctionFeatures
+IRToNativeSizeLearning::getFunctionFeatures(Function &F,
+                                            FunctionAnalysisManager &FAM) {
+  assert(ensureSortedTuples() && "expected lazy initialization");
+
+  auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
+  FunctionFeatures FF;
+  size_t InstrCount = getMaxInstructionID() + 1;
+  FF.InstructionHistogram.resize(InstrCount);
+
+  FF.InstructionPairHistogram.resize(
+      FunctionFeatures::ImportantInstructionSuccessions.size());
+
+  auto StartID = 0;
+  auto LastID = StartID;
+  auto getPairIndex = [](size_t a, size_t b) {
+    auto I =
+        std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(),
+                  FunctionFeatures::ImportantInstructionSuccessions.end(),
+                  std::make_pair(a, b));
+    if (I == FunctionFeatures::ImportantInstructionSuccessions.end())
+      return -1;
+    return static_cast<int>(std::distance(
+        FunctionFeatures::ImportantInstructionSuccessions.begin(), I));
+  };
+
+  // We don't want debug calls, because they'd just add noise.
+  for (auto &BB : F) {
+    for (auto I = BB.instructionsWithoutDebug().begin(),
+              E = BB.instructionsWithoutDebug().end();
+         I != E; ++I) {
+      auto ID = I->getOpcode();
+
+      ++FF.InstructionHistogram[ID];
+      int PairIndex = getPairIndex(LastID, ID);
+      if (PairIndex >= 0)
+        ++FF.InstructionPairHistogram[PairIndex];
+      LastID = ID;
+      if (isa<CallBase>(*I))
+        ++FF[NamedFeatureIndex::Calls];
+    }
+  }
+
+  FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
+  FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
+  FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
+  FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
+  FF[NamedFeatureIndex::Blocks] =
+      std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
+  auto &LI = FAM.getResult<LoopAnalysis>(F);
+  FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
+  for (auto &L : LI)
+    FF[NamedFeatureIndex::MaxLoopDepth] =
+        std::max(FF[NamedFeatureIndex::MaxLoopDepth],
+                 static_cast<int32_t>(L->getLoopDepth()));
+  FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
+  return FF;
+}
+
+void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
+  std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
+  Ptr += NamedFeatures.size();
+  std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
+  Ptr += InstructionHistogram.size();
+  std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
+            Ptr);
+}
+
+bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
+  return !TFIR2NativeModelPath.empty();
+}
+
+InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
+  if (!isEvaluatorRequested()) {
+    return;
+  }
+  std::vector<std::string> InputNames{"serving_default_input_1"};
+  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
+  Evaluator = std::make_unique<TFModelEvaluator>(
+      TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName);
+  if (!Evaluator || !Evaluator->isValid()) {
+    Evaluator.reset();
+    return;
+  }
+  static const std::vector<int64_t> Dim{
+      1, static_cast<int64_t>(
+             IRToNativeSizeLearning::FunctionFeatures::FeatureCount)};
+
+  Evaluator->initInput(0, TF_INT32, Dim);
+}
+
+InlineSizeEstimatorAnalysis::Result
+InlineSizeEstimatorAnalysis::run(const Function &F,
+                                 FunctionAnalysisManager &FAM) {
+  if (!Evaluator)
+    return None;
+  auto Features = IRToNativeSizeLearning::getFunctionFeatures(
+      const_cast<Function &>(F), FAM);
+  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator->getInput()[0]));
+  Features.fillTensor(V);
+  auto ER = Evaluator->evaluate();
+  if (!ER)
+    return None;
+  float Ret = *ER->getTensorValue<float>(0);
+  if (Ret < 0.0)
+    Ret = 0.0;
+  return static_cast<size_t>(Ret);
+}
+
+InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
+    InlineSizeEstimatorAnalysis &&Other)
+    : Evaluator(std::move(Other.Evaluator)) {}
+
+#else
+namespace llvm {
+class TFModelEvaluator {};
+} // namespace llvm
+InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
+    InlineSizeEstimatorAnalysis &&) {}
+InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis::Result
+InlineSizeEstimatorAnalysis::run(const Function &F,
+                                 FunctionAnalysisManager &FAM) {
+  return None;
+}
+bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
+#endif
--- a/llvm/lib/Analysis/TFUtils.cpp
+++ b/llvm/lib/Analysis/TFUtils.cpp
@ -0,0 +1,143 @@
+//===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for interfacing with tensorflow C APIs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Utils/TFUtils.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "tensorflow/c/c_api_experimental.h"
+
+#include <cassert>
+
+using namespace llvm;
+
+namespace {
+
+struct TFInitializer {
+  TFInitializer() {
+    assert(!IsInitialized && "TFInitialized should be called only once");
+    int Argc = 1;
+    const char *Name = "";
+    const char **NamePtr = &Name;
+    TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr));
+    IsInitialized = true;
+  }
+  bool IsInitialized = false;
+};
+
+llvm::ManagedStatic<TFInitializer> TFLibInitializer;
+
+bool ensureInitTF() { return TFLibInitializer->IsInitialized; }
+
+TFModelEvaluator::TFGraphPtr createTFGraph() {
+  return TFModelEvaluator::TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph);
+}
+
+TFModelEvaluator::TFStatusPtr createTFStatus() {
+  return TFModelEvaluator::TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus);
+}
+
+TFModelEvaluator::TFSessionOptionsPtr createTFSessionOptions() {
+  return TFModelEvaluator::TFSessionOptionsPtr(TF_NewSessionOptions(),
+                                               &TF_DeleteSessionOptions);
+}
+} // namespace
+
+TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
+                                   const std::vector<std::string> &InputNames,
+                                   const std::vector<std::string> &OutputNames,
+                                   const char *Tags)
+    : Graph(createTFGraph()), Options(createTFSessionOptions()),
+      InputFeed(InputNames.size()), Input(InputNames.size()),
+      OutputFeed(OutputNames.size()) {
+  if (!ensureInitTF()) {
+    errs() << "Tensorflow should have been initialized";
+    return;
+  }
+  auto Status = createTFStatus();
+
+  Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr,
+                                         SavedModelPath.str().c_str(), &Tags, 1,
+                                         Graph.get(), nullptr, Status.get());
+  if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
+    errs() << TF_Message(Status.get());
+    deleteSession();
+  }
+  for (size_t I = 0; I < InputNames.size(); ++I) {
+    InputFeed[I] = {
+        TF_GraphOperationByName(Graph.get(), (InputNames[I]).c_str()), 0};
+    if (!checkReportAndReset(InputFeed[I], InputNames[I]))
+      return;
+  }
+  for (size_t I = 0; I < OutputNames.size(); ++I) {
+    OutputFeed[I] = {
+        TF_GraphOperationByName(Graph.get(), (OutputNames[I]).c_str()), 0};
+    if (!checkReportAndReset(OutputFeed[I], OutputNames[I]))
+      return;
+  }
+}
+
+TFModelEvaluator::~TFModelEvaluator() {
+  for (auto *T : Input) {
+    TF_DeleteTensor(T);
+  }
+  deleteSession();
+}
+
+bool TFModelEvaluator::checkReportAndReset(const TF_Output &Output,
+                                           StringRef Name) {
+  if (Output.oper)
+    return true;
+  errs() << "Could not find TF_Output named: " + Name;
+  deleteSession();
+  return false;
+}
+
+void TFModelEvaluator::deleteSession() {
+  if (Session == nullptr)
+    return;
+  auto Status = createTFStatus();
+  TF_DeleteSession(Session, Status.get());
+  Session = nullptr;
+  if (TF_GetCode(Status.get()) != TF_Code::TF_OK)
+    errs() << "Could not delete TF session";
+}
+
+Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() {
+  if (!isValid())
+    return None;
+  EvaluationResult Ret(OutputFeed.size());
+  auto Status = createTFStatus();
+  TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(), Input.size(),
+                OutputFeed.data(), Ret.Output.data(), Ret.Output.size(),
+                nullptr, 0, nullptr, Status.get());
+  if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
+    errs() << TF_Message(Status.get());
+    deleteSession();
+    return None;
+  }
+  return Ret;
+}
+
+void TFModelEvaluator::initInput(int Index, TF_DataType Type,
+                                 const std::vector<int64_t> &Dimensions) {
+  int64_t TotalSize = TF_DataTypeSize(Type);
+  for (auto &D : Dimensions)
+    TotalSize *= D;
+
+  Input[Index] =
+      TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize);
+  std::memset(TF_TensorData(Input[Index]), 0, TotalSize);
+}
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@ -35,6 +35,7 @@
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/InlineAdvisor.h"
 #include "llvm/Analysis/InlineFeaturesAnalysis.h"
+#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@ -133,6 +133,7 @@ FUNCTION_ANALYSIS("loops", LoopAnalysis())
 FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis())
 FUNCTION_ANALYSIS("da", DependenceAnalysis())
 FUNCTION_ANALYSIS("inliner-features", InlineFeaturesAnalysis())
+FUNCTION_ANALYSIS("inliner-size-estimator", InlineSizeEstimatorAnalysis())
 FUNCTION_ANALYSIS("memdep", MemoryDependenceAnalysis())
 FUNCTION_ANALYSIS("memoryssa", MemorySSAAnalysis())
 FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis())
--- a/llvm/unittests/Analysis/CMakeLists.txt
+++ b/llvm/unittests/Analysis/CMakeLists.txt
@ -6,7 +6,13 @@ set(LLVM_LINK_COMPONENTS
  TransformUtils
  )

-add_llvm_unittest(AnalysisTests
+if (DEFINED LLVM_HAVE_TF_API)
+  LIST(APPEND EXTRA_TESTS TFUtilsTest.cpp)
+else()
+  LIST(APPEND LLVM_OPTIONAL_SOURCES TFUtilsTest.cpp)
+endif()
+
+add_llvm_unittest_with_input_files(AnalysisTests
  AliasAnalysisTest.cpp
  AliasSetTrackerTest.cpp
  AssumeBundleQueriesTest.cpp
@ -22,6 +28,7 @@ add_llvm_unittest(AnalysisTests
  DomTreeUpdaterTest.cpp
  GlobalsModRefTest.cpp
  InlineFeaturesAnalysisTest.cpp
+  InlineSizeEstimatorAnalysisTest.cpp
  IVDescriptorsTest.cpp
  LazyCallGraphTest.cpp
  LoadsTest.cpp
@ -40,4 +47,7 @@ add_llvm_unittest(AnalysisTests
  ValueLatticeTest.cpp
  ValueTrackingTest.cpp
  VectorUtilsTest.cpp
+  ${EXTRA_TESTS}
  )
+
+ target_link_libraries(AnalysisTests PRIVATE LLVMTestingSupport)
--- a/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
+++ b/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
@ -0,0 +1,101 @@
+//===- InlineSizeEstimatorAnalysisTest.cpp - test for ir2native -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+extern const char *TestMainArgv0;
+extern cl::opt<std::string> TFIR2NativeModelPath;
+
+#if LLVM_HAVE_TF_API
+static std::string getModelPath() {
+  SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
+  llvm::sys::path::append(InputsDir, "ir2native_x86_64_model");
+  return std::string(InputsDir);
+}
+#endif
+
+static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {
+  SMDiagnostic Err;
+  std::unique_ptr<Module> Mod = parseAssemblyString(IR, Err, C);
+  if (!Mod)
+    Err.print("MLAnalysisTests", errs());
+  return Mod;
+}
+
+static FunctionAnalysisManager buildFAM() {
+  FunctionAnalysisManager FAM;
+  FAM.registerPass([&] { return DominatorTreeAnalysis(); });
+  FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
+  FAM.registerPass([&] { return TargetIRAnalysis(); });
+  FAM.registerPass([&] { return LoopAnalysis(); });
+  return FAM;
+}
+
+// Test model loading and evaluation.
+TEST(InlineSizeEstimatorAnalysis, SizeIsValidTest) {
+  LLVMContext C;
+  std::unique_ptr<Module> M = parseIR(C,
+                                      R"IR(
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare i32 @f1(i32)
+declare i32 @f2(i32)
+
+define i32 @branches(i32) {
+  %cond = icmp slt i32 %0, 3
+  br i1 %cond, label %then, label %else
+
+then:
+  %ret.1 = call i32 @f1(i32 %0)
+  br label %last.block
+
+else:
+  %ret.2 = call i32 @f2(i32 %0)
+  br label %last.block
+
+last.block:
+  %ret = phi i32 [%ret.1, %then], [%ret.2, %else]
+  ret i32 %ret
+}
+
+define internal i32 @top() {
+  %1 = call i32 @branches(i32 2)
+  %2 = call i32 @f1(i32 %1)
+  ret i32 %2
+}
+)IR");
+
+  FunctionAnalysisManager FAM = buildFAM();
+#if LLVM_HAVE_TF_API
+  TFIR2NativeModelPath = getModelPath();
+#endif
+
+  InlineSizeEstimatorAnalysis FA;
+  auto SizeEstimate = FA.run(*M->getFunction("branches"), FAM);
+#if LLVM_HAVE_TF_API
+  EXPECT_GT(*SizeEstimate, 0);
+#else
+  EXPECT_FALSE(SizeEstimate.hasValue());
+#endif
+}
--- a/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt
+++ b/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt
--- a/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.data-00000-of-00001
+++ b/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.data-00000-of-00001
--- a/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.index
+++ b/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.index
--- a/llvm/unittests/Analysis/TFUtilsTest.cpp
+++ b/llvm/unittests/Analysis/TFUtilsTest.cpp
@ -0,0 +1,98 @@
+//===- TFUtilsTest.cpp - test for TFUtils ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Utils/TFUtils.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+extern const char *TestMainArgv0;
+
+static std::string getModelPath() {
+  SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
+  llvm::sys::path::append(InputsDir, "ir2native_x86_64_model");
+  return std::string(InputsDir);
+}
+
+// Test observable behavior when no model is provided.
+TEST(TFUtilsTest, NoModel) {
+  TFModelEvaluator Evaluator("", {}, {});
+  EXPECT_FALSE(Evaluator.isValid());
+}
+
+// Test we can correctly load a savedmodel and evaluate it.
+TEST(TFUtilsTest, LoadAndExecuteTest) {
+  // We use the ir2native model for test. We know it has one feature of
+  // dimension (1, 214)
+  std::vector<std::string> InputNames{"serving_default_input_1"};
+  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
+  const static int64_t KnownSize = 214;
+
+  TFModelEvaluator Evaluator(getModelPath(), InputNames, OutputName);
+  static const std::vector<int64_t> Dim{1, KnownSize};
+
+  EXPECT_TRUE(Evaluator.isValid());
+  Evaluator.initInput(0, TF_INT32, Dim);
+
+  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator.getInput()[0]));
+  // Fill it up with 1's, we know the output.
+  for (auto I = 0; I < KnownSize; ++I) {
+    V[I] = 1;
+  }
+  {
+    auto ER = Evaluator.evaluate();
+    EXPECT_TRUE(ER.hasValue());
+    float Ret = *ER->getTensorValue<float>(0);
+    EXPECT_EQ(static_cast<size_t>(Ret), 80);
+  }
+  // The input vector should be unchanged
+  for (auto I = 0; I < KnownSize; ++I) {
+    EXPECT_EQ(V[I], 1);
+  }
+  // Zero-out the unused position '0' of the instruction histogram, which is
+  // after the first 9 calculated values. Should the the same result.
+  V[9] = 0;
+  {
+    auto ER = Evaluator.evaluate();
+    EXPECT_TRUE(ER.hasValue());
+    float Ret = *ER->getTensorValue<float>(0);
+    EXPECT_EQ(static_cast<size_t>(Ret), 80);
+  }
+}
+
+// Test incorrect input setup
+TEST(TFUtilsTest, EvalError) {
+  // We use the ir2native model for test. We know it has one feature of
+  // dimension (1, 214)
+  std::vector<std::string> InputNames{"serving_default_input_1"};
+  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
+  const static int64_t KnownSize = 213;
+
+  TFModelEvaluator Evaluator(getModelPath(), InputNames, OutputName);
+  static const std::vector<int64_t> Dim{1, KnownSize};
+
+  EXPECT_TRUE(Evaluator.isValid());
+  Evaluator.initInput(0, TF_INT32, Dim);
+
+  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator.getInput()[0]));
+  // Fill it up with 1's, we know the output.
+  for (auto I = 0; I < KnownSize; ++I) {
+    V[I] = 1;
+  }
+  auto ER = Evaluator.evaluate();
+  EXPECT_FALSE(ER.hasValue());
+  EXPECT_FALSE(Evaluator.isValid());
+}