Commit FLO with control flow graph.

Summary: llvm-flo disassembles, builds control flow graph, and re-writes simple functions. (cherry picked from FBD2524024)
2025-02-20 01:57:37 +00:00 · 2015-10-09 17:21:14 -07:00 · 2015-10-09 17:21:14 -07:00 · 9a2fe7ebe4
commit 9a2fe7ebe4
parent 7927c14ff5
8 changed files with 1768 additions and 6 deletions
--- a/bolt/BinaryBasicBlock.cpp
+++ b/bolt/BinaryBasicBlock.cpp
@ -0,0 +1,65 @@
+//===--- BinaryBasicBlock.cpp - Interface for assembly-level basic block --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include <limits>
+#include <string>
+
+#include "BinaryBasicBlock.h"
+#include "BinaryFunction.h"
+
+#undef  DEBUG_TYPE
+#define DEBUG_TYPE "flo"
+
+namespace llvm {
+
+namespace flo {
+
+bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS) {
+  return LHS.Offset < RHS.Offset;
+}
+
+void BinaryBasicBlock::addSuccessor(BinaryBasicBlock *Succ,
+                                    uint64_t Count,
+                                    uint64_t MispredictedCount) {
+  Successors.push_back(Succ);
+  Succ->Predecessors.push_back(this);
+
+  // TODO: update weights.
+}
+
+void BinaryBasicBlock::removeSuccessor(BinaryBasicBlock *Succ) {
+  Succ->removePredecessor(this);
+  auto I = std::find(succ_begin(), succ_end(), Succ);
+  assert(I != succ_end() && "no such successor!");
+
+  Successors.erase(I);
+
+  // TODO: update weights.
+}
+
+void BinaryBasicBlock::addPredecessor(BinaryBasicBlock *Pred) {
+  Predecessors.push_back(Pred);
+}
+
+void BinaryBasicBlock::removePredecessor(BinaryBasicBlock *Pred) {
+  auto I = std::find(pred_begin(), pred_end(), Pred);
+  assert(I != pred_end() && "Pred is not a predecessor of this block!");
+  Predecessors.erase(I);
+}
+
+} // namespace flo
+
+} // namespace llvm
--- a/bolt/BinaryBasicBlock.h
+++ b/bolt/BinaryBasicBlock.h
@ -0,0 +1,212 @@
+//===--- BinaryBasicBlock.h - Interface for assembly-level basic block ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: memory management for instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_FLO_BINARY_BASIC_BLOCK_H
+#define LLVM_TOOLS_LLVM_FLO_BINARY_BASIC_BLOCK_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <limits>
+
+namespace llvm {
+
+namespace flo {
+
+class BinaryFunction;
+
+/// The intention is to keep the structure similar to MachineBasicBlock as
+/// we might switch to it at some point.
+class BinaryBasicBlock {
+
+  /// Label associated with the block.
+  MCSymbol *Label{nullptr};
+
+  /// Original offset in the function.
+  uint64_t Offset{std::numeric_limits<uint64_t>::max()};
+
+  /// Alignment requirements for the block.
+  uint64_t Alignment{1};
+
+  /// Vector of all instructions in the block.
+  std::vector<MCInst> Instructions;
+
+  /// CFG information.
+  std::vector<BinaryBasicBlock *> Predecessors;
+  std::vector<BinaryBasicBlock *> Successors;
+
+  struct BinaryBranchInfo {
+    uint64_t Count;
+    uint64_t MispredictedCount; /// number of branches mispredicted
+  };
+
+  /// Each successor has a corresponding BranchInfo entry in the list.
+  std::vector<BinaryBranchInfo> BranchInfo;
+  typedef std::vector<BinaryBranchInfo>::iterator          branch_info_iterator;
+  typedef std::vector<BinaryBranchInfo>::const_iterator
+                                                     const_branch_info_iterator;
+
+  BinaryBasicBlock() {}
+
+  explicit BinaryBasicBlock(
+      MCSymbol *Label,
+      uint64_t Offset = std::numeric_limits<uint64_t>::max())
+    : Label(Label), Offset(Offset) {}
+
+  explicit BinaryBasicBlock(uint64_t Offset)
+    : Offset(Offset) {}
+
+  // Exclusively managed by BinaryFunction.
+  friend class BinaryFunction;
+  friend bool operator<(const BinaryBasicBlock &LHS,
+                        const BinaryBasicBlock &RHS);
+
+public:
+
+  // Instructions iterators.
+  typedef std::vector<MCInst>::iterator                                iterator;
+  typedef std::vector<MCInst>::const_iterator                    const_iterator;
+  typedef std::reverse_iterator<const_iterator>          const_reverse_iterator;
+  typedef std::reverse_iterator<iterator>                      reverse_iterator;
+
+  MCInst       &front()                 { return Instructions.front();  }
+  MCInst       &back()                  { return Instructions.back();   }
+  const MCInst &front()           const { return Instructions.front();  }
+  const MCInst &back()            const { return Instructions.back();   }
+
+  iterator                begin()       { return Instructions.begin();  }
+  const_iterator          begin() const { return Instructions.begin();  }
+  iterator                end  ()       { return Instructions.end();    }
+  const_iterator          end  () const { return Instructions.end();    }
+  reverse_iterator       rbegin()       { return Instructions.rbegin(); }
+  const_reverse_iterator rbegin() const { return Instructions.rbegin(); }
+  reverse_iterator       rend  ()       { return Instructions.rend();   }
+  const_reverse_iterator rend  () const { return Instructions.rend();   }
+
+  // CFG iterators.
+  typedef std::vector<BinaryBasicBlock *>::iterator       pred_iterator;
+  typedef std::vector<BinaryBasicBlock *>::const_iterator const_pred_iterator;
+  typedef std::vector<BinaryBasicBlock *>::iterator       succ_iterator;
+  typedef std::vector<BinaryBasicBlock *>::const_iterator const_succ_iterator;
+  typedef std::vector<BinaryBasicBlock *>::reverse_iterator
+                                                         pred_reverse_iterator;
+  typedef std::vector<BinaryBasicBlock *>::const_reverse_iterator
+                                                   const_pred_reverse_iterator;
+  typedef std::vector<BinaryBasicBlock *>::reverse_iterator
+                                                         succ_reverse_iterator;
+  typedef std::vector<BinaryBasicBlock *>::const_reverse_iterator
+                                                   const_succ_reverse_iterator;
+  pred_iterator        pred_begin()       { return Predecessors.begin(); }
+  const_pred_iterator  pred_begin() const { return Predecessors.begin(); }
+  pred_iterator        pred_end()         { return Predecessors.end();   }
+  const_pred_iterator  pred_end()   const { return Predecessors.end();   }
+  pred_reverse_iterator        pred_rbegin()
+                                          { return Predecessors.rbegin();}
+  const_pred_reverse_iterator  pred_rbegin() const
+                                          { return Predecessors.rbegin();}
+  pred_reverse_iterator        pred_rend()
+                                          { return Predecessors.rend();  }
+  const_pred_reverse_iterator  pred_rend()   const
+                                          { return Predecessors.rend();  }
+  unsigned             pred_size()  const {
+    return (unsigned)Predecessors.size();
+  }
+  bool                 pred_empty() const { return Predecessors.empty(); }
+
+  succ_iterator        succ_begin()       { return Successors.begin();   }
+  const_succ_iterator  succ_begin() const { return Successors.begin();   }
+  succ_iterator        succ_end()         { return Successors.end();     }
+  const_succ_iterator  succ_end()   const { return Successors.end();     }
+  succ_reverse_iterator        succ_rbegin()
+                                          { return Successors.rbegin();  }
+  const_succ_reverse_iterator  succ_rbegin() const
+                                          { return Successors.rbegin();  }
+  succ_reverse_iterator        succ_rend()
+                                          { return Successors.rend();    }
+  const_succ_reverse_iterator  succ_rend()   const
+                                          { return Successors.rend();    }
+  unsigned             succ_size()  const {
+    return (unsigned)Successors.size();
+  }
+  bool                 succ_empty() const { return Successors.empty();   }
+
+  inline iterator_range<pred_iterator> predecessors() {
+    return iterator_range<pred_iterator>(pred_begin(), pred_end());
+  }
+  inline iterator_range<const_pred_iterator> predecessors() const {
+    return iterator_range<const_pred_iterator>(pred_begin(), pred_end());
+  }
+  inline iterator_range<succ_iterator> successors() {
+    return iterator_range<succ_iterator>(succ_begin(), succ_end());
+  }
+  inline iterator_range<const_succ_iterator> successors() const {
+    return iterator_range<const_succ_iterator>(succ_begin(), succ_end());
+  }
+
+  /// Return symbol marking the start of this basic block.
+  MCSymbol *getLabel() const {
+    return Label;
+  }
+
+  /// Return local name for the block.
+  StringRef getName() const {
+    return Label->getName();
+  }
+
+  /// Add instruction at the end of this basic block.
+  void addInstruction(MCInst &Inst) {
+    Instructions.emplace_back(Inst);
+  }
+
+  /// Return required alignment for the block.
+  uint64_t getAlignment() const {
+    return Alignment;
+  }
+
+  /// Adds block to successor list, and also updates predecessor list for
+  /// successor block.
+  /// Set branch info for this path.
+  void addSuccessor(BinaryBasicBlock *Succ,
+                    uint64_t Count = 0,
+                    uint64_t MispredictedCount = 0);
+
+  /// Remove /p Succ basic block from the list of successors. Update the
+  /// list of predecessors of /p Succ and update branch info.
+  void removeSuccessor(BinaryBasicBlock *Succ);
+
+private:
+
+  /// Adds predecessor to the BB. Most likely you don't need to call this.
+  void addPredecessor(BinaryBasicBlock *Pred);
+
+  /// Remove predecessor of the basic block. Don't use directly, instead
+  /// use removeSuccessor() funciton.
+  void removePredecessor(BinaryBasicBlock *Pred);
+};
+
+bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS);
+
+
+} // namespace flo
+
+} // namespace llvm
+
+#endif
--- a/bolt/BinaryContext.h
+++ b/bolt/BinaryContext.h
@ -0,0 +1,114 @@
+//===--- BinaryContext.h  - Interface for machine-level context -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_FLO_BINARY_CONTEXT_H
+#define LLVM_TOOLS_LLVM_FLO_BINARY_CONTEXT_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#include <functional>
+#include <map>
+#include <string>
+#include <system_error>
+
+namespace llvm {
+
+namespace flo {
+
+/// Everything that's needed to process binaries lives here.
+class BinaryContext {
+
+  BinaryContext() = delete;
+
+public:
+
+  // [name] -> [address]
+  typedef std::map<std::string, uint64_t> SymbolMapType;
+  SymbolMapType GlobalSymbols;
+
+  // [address] -> [name1], [name2], ...
+  std::multimap<uint64_t, std::string> GlobalAddresses;
+
+  std::unique_ptr<MCContext> Ctx;
+
+  std::unique_ptr<Triple> TheTriple;
+
+  const Target *TheTarget;
+
+  MCCodeEmitter *MCE;
+
+  std::unique_ptr<MCObjectFileInfo> MOFI;
+
+  std::unique_ptr<const MCAsmInfo> AsmInfo;
+
+  std::unique_ptr<const MCInstrInfo> MII;
+
+  std::unique_ptr<const MCSubtargetInfo> STI;
+
+  std::unique_ptr<MCInstPrinter> InstPrinter;
+
+  std::unique_ptr<const MCInstrAnalysis> MIA;
+
+  std::unique_ptr<const MCRegisterInfo> MRI;
+
+  std::unique_ptr<MCDisassembler> DisAsm;
+
+  std::function<void(std::error_code)> ErrorCheck;
+
+  MCAsmBackend *MAB;
+
+  BinaryContext(std::unique_ptr<MCContext> Ctx,
+                std::unique_ptr<Triple> TheTriple,
+                const Target *TheTarget,
+                MCCodeEmitter *MCE,
+                std::unique_ptr<MCObjectFileInfo> MOFI,
+                std::unique_ptr<const MCAsmInfo> AsmInfo,
+                std::unique_ptr<const MCInstrInfo> MII,
+                std::unique_ptr<const MCSubtargetInfo> STI,
+                std::unique_ptr<MCInstPrinter> InstPrinter,
+                std::unique_ptr<const MCInstrAnalysis> MIA,
+                std::unique_ptr<const MCRegisterInfo> MRI,
+                std::unique_ptr<MCDisassembler> DisAsm,
+                MCAsmBackend *MAB) :
+      Ctx(std::move(Ctx)),
+      TheTriple(std::move(TheTriple)),
+      TheTarget(TheTarget),
+      MCE(MCE),
+      MOFI(std::move(MOFI)),
+      AsmInfo(std::move(AsmInfo)),
+      MII(std::move(MII)),
+      STI(std::move(STI)),
+      InstPrinter(std::move(InstPrinter)),
+      MIA(std::move(MIA)),
+      MRI(std::move(MRI)),
+      DisAsm(std::move(DisAsm)),
+      MAB(MAB) {}
+
+  ~BinaryContext() {}
+};
+
+} // namespace flo
+
+} // namespace llvm
+
+#endif
--- a/bolt/BinaryFunction.cpp
+++ b/bolt/BinaryFunction.cpp
@ -0,0 +1,381 @@
+//===--- BinaryFunction.cpp - Interface for machine-level function --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <limits>
+#include <string>
+
+#include "BinaryBasicBlock.h"
+#include "BinaryFunction.h"
+
+#undef  DEBUG_TYPE
+#define DEBUG_TYPE "flo"
+
+namespace llvm {
+
+namespace flo {
+
+void BinaryFunction::print(raw_ostream &OS, bool PrintInstructions) const {
+  StringRef SectionName;
+  Section.getName(SectionName);
+  OS << "Binary Function \"" << getName() << "\" {"
+     << "\n  State       : "   << CurrentState
+     << "\n  Address     : 0x" << Twine::utohexstr(Address)
+     << "\n  Size        : 0x" << Twine::utohexstr(Size)
+     << "\n  MaxSize     : 0x" << Twine::utohexstr(MaxSize)
+     << "\n  Offset      : 0x" << Twine::utohexstr(FileOffset)
+     << "\n  Section     : "   << SectionName
+     << "\n  Orc Section : "   << getCodeSectionName()
+     << "\n  IsSimple    : "   << IsSimple
+     << "\n  BB count    : "   << BasicBlocks.size()
+     << "\n  Image       : 0x" << Twine::utohexstr(ImageAddress)
+     << "\n}\n";
+
+  if (!PrintInstructions || !BC.InstPrinter)
+    return;
+
+  // Offset of the instruction in function.
+  uint64_t Offset{0};
+
+  if (BasicBlocks.empty() && !Instructions.empty()) {
+    // Print before CFG was built.
+    for (const auto &II : Instructions) {
+      auto Offset = II.first;
+
+      // Print label if exists at this offset.
+      auto LI = Labels.find(Offset);
+      if (LI != Labels.end())
+        OS << LI->second->getName() << ":\n";
+
+      auto &Instruction = II.second;
+      OS << format("    %08" PRIx64 ": ", Offset);
+      BC.InstPrinter->printInst(&Instruction, OS, "", *BC.STI);
+      OS << "\n";
+    }
+  }
+
+  for (const auto &BB : BasicBlocks) {
+    OS << BB.getName() << " ("
+       << BB.Instructions.size() << " instructions)\n";
+
+    if (!BB.Predecessors.empty()) {
+      OS << "  Predecessors: ";
+      auto Sep = "";
+      for (auto Pred : BB.Predecessors) {
+        OS << Sep << Pred->getName();
+        Sep = ", ";
+      }
+      OS << '\n';
+    }
+
+    Offset = RoundUpToAlignment(Offset, BB.getAlignment());
+
+    for (auto &Instr : BB) {
+      OS << format("    %08" PRIx64 ": ", Offset);
+      BC.InstPrinter->printInst(&Instr, OS, "", *BC.STI);
+      OS << "\n";
+
+      // In case we need MCInst printer:
+      // Instr.dump_pretty(OS, InstructionPrinter.get());
+
+      // Calculate the size of the instruction.
+      // Note: this is imprecise since happening prior to relaxation.
+      SmallString<256> Code;
+      SmallVector<MCFixup, 4> Fixups;
+      raw_svector_ostream VecOS(Code);
+      BC.MCE->encodeInstruction(Instr, VecOS, Fixups, *BC.STI);
+      Offset += Code.size();
+    }
+
+    if (!BB.Successors.empty()) {
+      OS << "  Successors: ";
+      auto Sep = "";
+      for (auto Succ : BB.Successors) {
+        OS << Sep << Succ->getName();
+        Sep = ", ";
+      }
+      OS << '\n';
+    }
+
+    OS << '\n';
+  }
+
+  OS << "End of Function \"" << getName() << "\"\n";
+}
+
+bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
+  assert(FunctionData.size() == getSize() &&
+         "function size does not match raw data size");
+
+  auto &Ctx = BC.Ctx;
+  auto &MIA = BC.MIA;
+
+  // Insert a label at the beginning of the function. This will be our first
+  // basic block.
+  Labels[0] = Ctx->createTempSymbol("BB0", false);
+
+  bool IsSimple = true;
+  for (uint64_t Offset = 0; IsSimple && (Offset < getSize()); ) {
+    MCInst Instruction;
+    uint64_t Size;
+    if (!BC.DisAsm->getInstruction(Instruction,
+                                   Size,
+                                   FunctionData.slice(Offset),
+                                   getAddress() + Offset,
+                                   nulls(),
+                                   nulls())) {
+      // Ignore this function. Skip to the next one.
+      IsSimple = false;
+      break;
+    }
+
+    if (MIA->isIndirectBranch(Instruction)) {
+      IsSimple = false;
+      break;
+    }
+
+    if (MIA->isBranch(Instruction) || MIA->isCall(Instruction)) {
+      uint64_t InstructionTarget = 0;
+      uint64_t AbsoluteInstrAddr = getAddress() + Offset;
+      if (MIA->evaluateBranch(Instruction,
+                              AbsoluteInstrAddr,
+                              Size,
+                              InstructionTarget)) {
+        // Check if the target is within the same function. Otherwise it's
+        // a call, possibly a tail call.
+        //
+        // If the target *is* the function address it could be either a branch
+        // or a recursive call.
+        bool IsCall = MIA->isCall(Instruction);
+        MCSymbol *TargetSymbol{nullptr};
+        uint64_t TargetOffset{0};
+
+        if (IsCall && containsAddress(InstructionTarget)) {
+          if (InstructionTarget == getAddress()) {
+            // Recursive call.
+            TargetSymbol = Ctx->getOrCreateSymbol(getName());
+          } else {
+            // Possibly an old-style PIC code
+            DEBUG(dbgs() << "FLO: internal call detected at 0x"
+                         << Twine::utohexstr(AbsoluteInstrAddr)
+                         << " in function " << getName() << "\n");
+            IsSimple = false;
+            break;
+          }
+        }
+
+        if (!TargetSymbol) {
+          // Create either local label or external symbol.
+          if (containsAddress(InstructionTarget)) {
+            // Check if there's already a registered label.
+            TargetOffset = InstructionTarget - getAddress();
+            auto LI = Labels.find(TargetOffset);
+            if (LI == Labels.end()) {
+              TargetSymbol = Ctx->createTempSymbol();
+              Labels[TargetOffset] = TargetSymbol;
+            } else {
+              TargetSymbol = LI->second;
+            }
+          } else {
+            // This is a call regardless of the opcode (e.g. tail call).
+            IsCall = true;
+            // Check if we already have a symbol at this address.
+            std::string Name;
+            auto NI = BC.GlobalAddresses.find(InstructionTarget);
+            if (NI != BC.GlobalAddresses.end()) {
+              // Any registered name will do.
+              Name = NI->second;
+            } else {
+              // Create a new symbol at the destination.
+              Name = (Twine("FUNCat0x") +
+                      Twine::utohexstr(InstructionTarget)).str();
+              BC.GlobalAddresses.emplace(std::make_pair(InstructionTarget,
+                                                        Name));
+            }
+            TargetSymbol =  Ctx->getOrCreateSymbol(Name);
+            BC.GlobalSymbols[Name] = InstructionTarget;
+          }
+        }
+
+        Instruction.clear();
+        Instruction.addOperand(
+            MCOperand::createExpr(
+              MCSymbolRefExpr::create(TargetSymbol,
+                                      MCSymbolRefExpr::VK_None,
+                                      *Ctx)));
+        if (!IsCall) {
+          // Add local branch info.
+          LocalBranches.push_back({Offset, TargetOffset});
+        }
+
+      } else {
+        // Indirect call
+        IsSimple = false;
+        break;
+      }
+    } else {
+      if (MIA->hasRIPOperand(Instruction)) {
+        DEBUG(dbgs() << "FLO: rip-relative instruction found "
+                        "(not supported yet)\n");
+        IsSimple = false;
+        break;
+      }
+    }
+
+    addInstruction(Offset, std::move(Instruction));
+
+    Offset += Size;
+  }
+
+  setSimple(IsSimple);
+
+  // TODO: clear memory if not simple function?
+
+  // Update state.
+  updateState(State::Disassembled);
+
+  // Print the function in the new state.
+  DEBUG(print(dbgs(), /* PrintInstructions = */ true));
+
+  return true;
+}
+
+bool BinaryFunction::buildCFG() {
+
+  auto &MIA = BC.MIA;
+
+  if (!isSimple())
+    return false;
+
+  if (!(CurrentState == State::Disassembled))
+    return false;
+
+  assert(BasicBlocks.empty() && "basic block list should be empty");
+  assert((Labels.find(0) != Labels.end()) &&
+         "first instruction should always have a label");
+
+  // Create basic blocks in the original layout order:
+  //
+  //  * Every instruction with associated label marks
+  //    the beginning of a basic block.
+  //  * Conditional instruction marks the end of a basic block,
+  //    except when the following instruction is an
+  //    unconditional branch, and the unconditional branch is not
+  //    a destination of another branch. In the latter case, the
+  //    basic block will consist of a single unconditional branch
+  //    (missed optimization opportunity?).
+  //
+  // Created basic blocks are sorted in layout order since they are
+  // created in the same order as instructions, and instructions are
+  // sorted by offsets.
+  BinaryBasicBlock *InsertBB{nullptr};
+  BinaryBasicBlock *PrevBB{nullptr};
+  for (auto &InstrInfo : Instructions) {
+    auto LI = Labels.find(InstrInfo.first);
+    if (LI != Labels.end()) {
+      // Always create new BB at branch destination.
+      PrevBB = InsertBB;
+      InsertBB = addBasicBlock(LI->first, LI->second);
+    }
+    if (!InsertBB) {
+      // It must be a fallthrough. Create a new block unless we see an
+      // unconditional branch.
+      assert(PrevBB && "no previous basic block for a fall through");
+      if (MIA->isUnconditionalBranch(InstrInfo.second)) {
+        // Temporarily restore inserter basic block.
+        InsertBB = PrevBB;
+      } else {
+        InsertBB = addBasicBlock(InstrInfo.first,
+                                 BC.Ctx->createTempSymbol("FT", true));
+      }
+    }
+
+    InsertBB->addInstruction(InstrInfo.second);
+
+    // How well do we detect tail calls here?
+    if (MIA->isTerminator(InstrInfo.second)) {
+      PrevBB = InsertBB;
+      InsertBB = nullptr;
+    }
+  }
+
+  // Intermediate dump.
+  DEBUG(print(dbgs(), /* PrintInstructions = */ true));
+
+  // TODO: handle properly calls to no-return functions,
+  // e.g. exit(3), etc. Otherwise we'll see a false fall-through
+  // blocks.
+
+  for (auto &Branch : LocalBranches) {
+
+    DEBUG(dbgs() << "registering branch [0x" << Twine::utohexstr(Branch.first)
+                 << "] -> [0x" << Twine::utohexstr(Branch.second) << "]\n");
+    BinaryBasicBlock *FromBB = getBasicBlockContainingOffset(Branch.first);
+    assert(FromBB && "cannot find BB containing FROM branch");
+    BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Branch.second);
+    assert(ToBB && "cannot find BB containing TO branch");
+
+    // TODO: add weights here.
+    //
+    FromBB->addSuccessor(ToBB);
+  }
+
+  // Add fall-through branches.
+  PrevBB = nullptr;
+  bool IsPrevFT = false; // Is previous block a fall-through.
+  for (auto &BB : BasicBlocks) {
+    if (IsPrevFT) {
+      PrevBB->addSuccessor(&BB);
+    }
+
+    MCInst &LastInst = BB.back();
+    if (BB.succ_size() == 0) {
+      IsPrevFT = MIA->isTerminator(LastInst) ? false : true;
+    } else if (BB.succ_size() == 1) {
+      IsPrevFT =  MIA->isConditionalBranch(LastInst) ? true : false;
+    } else {
+      // Either ends with 2 branches, or with an indirect jump.
+      IsPrevFT = false;
+    }
+
+    PrevBB = &BB;
+  }
+
+  if (!IsPrevFT) {
+    // Possibly a call that does not return.
+    DEBUG(dbgs() << "last block was marked as a fall-through\n");
+  }
+
+  // Clean-up memory taken by instructions and labels.
+  clearInstructions();
+  clearLabels();
+  clearLocalBranches();
+
+  // Update the state.
+  CurrentState = State::CFG;
+
+  // Print the function in the new state.
+  DEBUG(print(dbgs(), /* PrintInstructions = */ true));
+
+  return true;
+}
+
+} // namespace flo
+
+} // namespace llvm
--- a/bolt/BinaryFunction.h
+++ b/bolt/BinaryFunction.h
@ -0,0 +1,399 @@
+//===--- BinaryFunction.h - Interface for machine-level function ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to function in binary (machine) form. This is assembly-level
+// code representation with the control flow.
+//
+// TODO: memory management for instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_FLO_BINARY_FUNCTION_H
+#define LLVM_TOOLS_LLVM_FLO_BINARY_FUNCTION_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <limits>
+
+#include "BinaryBasicBlock.h"
+#include "BinaryContext.h"
+
+using namespace llvm::object;
+
+namespace llvm {
+
+namespace flo {
+
+/// BinaryFunction is a representation of machine-level function.
+//
+/// We use the term "Binary" as "Machine" was already taken.
+class BinaryFunction {
+public:
+  enum class State : char {
+    Empty = 0,        /// Function body is empty
+    Disassembled,     /// Function have been disassembled
+    CFG,              /// Control flow graph have been built
+    Assembled,        /// Function has been assembled in memory
+  };
+
+  static constexpr uint64_t COUNT_NO_PROFILE =
+    std::numeric_limits<uint64_t>::max();
+
+private:
+
+  /// Current state of the function.
+  State CurrentState{State::Empty};
+
+  /// Name of the function as we know it.
+  std::string Name;
+
+  /// Symbol associated with this function.
+  SymbolRef Symbol;
+
+  /// Containing section
+  SectionRef Section;
+
+  /// Address of the function in memory. Also could be an offset from
+  /// base address for position independent binaries.
+  uint64_t Address;
+
+  /// Original size of the function.
+  uint64_t Size;
+
+  /// Offset in the file.
+  uint64_t FileOffset{0};
+
+  /// Maximum size this function is allowed to have.
+  uint64_t MaxSize{std::numeric_limits<uint64_t>::max()};
+
+  /// Alignment requirements for the function.
+  uint64_t Alignment{1};
+
+  /// False if the function is too complex to reconstruct its control
+  /// flow graph and re-assemble.
+  bool IsSimple{true};
+
+  BinaryContext &BC;
+
+  /// The address for the code for this function in codegen memory.
+  uint64_t ImageAddress{0};
+
+  /// The size of the code in memory.
+  uint64_t ImageSize{0};
+
+  /// Name for the section this function code should reside in.
+  std::string CodeSectionName;
+
+  /// The profile data for the number of times the function was executed.
+  uint64_t ExecutionCount{COUNT_NO_PROFILE};
+
+  /// Release storage used by instructions.
+  BinaryFunction &clearInstructions() {
+    std::map<uint64_t, MCInst> TempMap;
+    Instructions.swap(TempMap);
+    return *this;
+  }
+
+  /// Release storage used by instructions.
+  BinaryFunction &clearLabels() {
+    std::map<uint64_t, MCSymbol *> TempMap;
+    Labels.swap(TempMap);
+    return *this;
+  }
+
+  /// Release memory taken by local branch info.
+  BinaryFunction &clearLocalBranches() {
+    std::vector<std::pair<uint64_t, uint64_t>> TempVector;
+    LocalBranches.swap(TempVector);
+    return *this;
+  }
+
+  BinaryFunction &updateState(BinaryFunction::State State) {
+    CurrentState = State;
+    return *this;
+  }
+
+public:
+  std::vector<std::pair<uint64_t, uint64_t>> LocalBranches;
+
+  std::map<uint64_t, MCSymbol *> Labels;
+
+  /// Temporary holder of instructions before CFG is constructed.
+  std::map<uint64_t, MCInst> Instructions;
+
+  // Blocks are kept sorted in the layout order. If we need to change the
+  // layout, the terminating instructions need to be modified.
+  typedef std::vector<BinaryBasicBlock> BasicBlockListType;
+  BasicBlockListType BasicBlocks;
+
+  typedef BasicBlockListType::iterator iterator;
+  typedef BasicBlockListType::const_iterator const_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef std::reverse_iterator<iterator>             reverse_iterator;
+
+  // CFG iterators.
+  iterator                 begin()       { return BasicBlocks.begin(); }
+  const_iterator           begin() const { return BasicBlocks.begin(); }
+  iterator                 end  ()       { return BasicBlocks.end();   }
+  const_iterator           end  () const { return BasicBlocks.end();   }
+
+  reverse_iterator        rbegin()       { return BasicBlocks.rbegin(); }
+  const_reverse_iterator  rbegin() const { return BasicBlocks.rbegin(); }
+  reverse_iterator        rend  ()       { return BasicBlocks.rend();   }
+  const_reverse_iterator  rend  () const { return BasicBlocks.rend();   }
+
+  unsigned                  size() const { return (unsigned)BasicBlocks.size();}
+  bool                     empty() const { return BasicBlocks.empty(); }
+  const BinaryBasicBlock &front() const  { return BasicBlocks.front(); }
+        BinaryBasicBlock &front()        { return BasicBlocks.front(); }
+  const BinaryBasicBlock & back() const  { return BasicBlocks.back(); }
+        BinaryBasicBlock & back()        { return BasicBlocks.back(); }
+
+
+  BinaryFunction(StringRef Name, SymbolRef Symbol, SectionRef Section,
+                 uint64_t Address, uint64_t Size, BinaryContext &BC) :
+      Name(Name), Symbol(Symbol), Section(Section), Address(Address),
+      Size(Size), BC(BC), CodeSectionName((".text." + Name).str()) {}
+
+  /// Perform optimal code layout based on edge frequencies making necessary
+  /// adjustments to instructions at the end of basic blocks.
+  void optimizeLayout();
+
+  /// View CFG in graphviz program
+  void viewGraph();
+
+  /// Basic block iterator
+
+  /// Return the name of the function as extracted from the binary file.
+  StringRef getName() const {
+    return Name;
+  }
+
+  /// Return symbol associated with the function start.
+  SymbolRef getSymbol() const {
+    return Symbol;
+  }
+
+  /// Return containing file section.
+  SectionRef getSection() const {
+    return Section;
+  }
+
+  /// Return original address of the function (or offset from base for PIC).
+  uint64_t getAddress() const {
+    return Address;
+  }
+
+  /// Return offset of the function body in the binary file.
+  uint64_t getFileOffset() const {
+    return FileOffset;
+  }
+
+  /// Return (original) size of the function.
+  uint64_t getSize() const {
+    return Size;
+  }
+
+  /// Return the maximum size the body of the function could have.
+  uint64_t getMaxSize() const {
+    return MaxSize;
+  }
+
+  /// Return internal section name for this function.
+  StringRef getCodeSectionName() const {
+    assert(!CodeSectionName.empty() && "no section name for function");
+    return StringRef(CodeSectionName);
+  }
+
+  /// Return true if the function could be correctly processed.
+  bool isSimple() const {
+    return IsSimple;
+  }
+
+  /// Return true if the given address \p PC is inside the function body.
+  bool containsAddress(uint64_t PC) const {
+    return Address <= PC && PC < Address + Size;
+  }
+
+  /// Create a basic block at a given \p Offset in the
+  /// function and append it to the end of list of blocks.
+  /// Returns NULL if basic block already exists at the \p Offset.
+  BinaryBasicBlock *addBasicBlock(uint64_t Offset, MCSymbol *Label = nullptr) {
+    assert(!getBasicBlockAtOffset(Offset) && "basic block already exists");
+    if (!Label)
+      Label = BC.Ctx->createTempSymbol("BB", true);
+    BasicBlocks.emplace_back(BinaryBasicBlock(Label, Offset));
+
+    return &BasicBlocks.back();
+  }
+
+  BinaryBasicBlock *getOrCreateBasicBlockAt(uint64_t Offset,
+                                            MCSymbol *Label = nullptr) {
+    BinaryBasicBlock *BB = getBasicBlockAtOffset(Offset);
+    if (!BB)
+      BB = addBasicBlock(Offset, Label);
+
+    return BB;
+  }
+
+  /// Return basic block that started at offset \p Offset.
+  BinaryBasicBlock *getBasicBlockAtOffset(uint64_t Offset) {
+    BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
+    if (BB && BB->Offset == Offset)
+      return BB;
+
+    return nullptr;
+  }
+
+  /// Return basic block that originally contained offset \p Offset
+  /// from the function start.
+  BinaryBasicBlock *getBasicBlockContainingOffset(uint64_t Offset) {
+    if (Offset > Size)
+      return nullptr;
+
+    if (BasicBlocks.empty())
+      return nullptr;
+
+    auto I = std::lower_bound(BasicBlocks.begin(),
+                              BasicBlocks.end(),
+                              BinaryBasicBlock(Offset));
+
+    if (I == BasicBlocks.end())
+      return &BasicBlocks.back();
+
+    return &(*I);
+  }
+
+  /// Dump function information to debug output. If \p PrintInstructions
+  /// is true - include instruction disassembly.
+  void dump(bool PrintInstructions = false) const {
+    print(dbgs(), PrintInstructions);
+  }
+
+  /// Print function information to the \p OS stream.
+  void print(raw_ostream &OS, bool PrintInstructions = false) const;
+
+  void addInstruction(uint64_t Offset, MCInst &&Instruction) {
+    Instructions.emplace(Offset, std::forward<MCInst>(Instruction));
+  }
+
+  BinaryFunction &setFileOffset(uint64_t Offset) {
+    FileOffset = Offset;
+    return *this;
+  }
+
+  BinaryFunction &setMaxSize(uint64_t Size) {
+    MaxSize = Size;
+    return *this;
+  }
+
+  BinaryFunction &setSimple(bool Simple) {
+    IsSimple = Simple;
+    return *this;
+  }
+
+  BinaryFunction &setAlignment(uint64_t Align) {
+    Alignment = Align;
+    return *this;
+  }
+
+  uint64_t getAlignment() const {
+    return Alignment;
+  }
+
+  BinaryFunction &setImageAddress(uint64_t Address) {
+    ImageAddress = Address;
+    return *this;
+  }
+
+  /// Return the address of this function' image in memory.
+  uint64_t getImageAddress() const {
+    return ImageAddress;
+  }
+
+  BinaryFunction &setImageSize(uint64_t Size) {
+    ImageSize = Size;
+    return *this;
+  }
+
+  /// Return the size of this function' image in memory.
+  uint64_t getImageSize() const {
+    return ImageSize;
+  }
+
+  /// Set the profile data for the number of times the function was called.
+  BinaryFunction &setExecutionCount(uint64_t Count) {
+    ExecutionCount = Count;
+    return *this;
+  }
+
+  /// Return the profile information about the number of times
+  /// the function was executed.
+  ///
+  /// Return COUNT_NO_PROFILE if there's no profile info.
+  uint64_t getExecutionCount() const {
+    return ExecutionCount;
+  }
+
+  /// Disassemble function from raw data \p FunctionData.
+  /// If successful, this function will populate the list of instructions
+  /// for this function together with offsets from the function start
+  /// in the input. It will also populate Labels with destinations for
+  /// local branches, and LocalBranches with [from, to] info.
+  ///
+  /// \p FunctionData is the set bytes representing the function body.
+  ///
+  /// The Function should be properly initialized before this function
+  /// is called. I.e. function address and size should be set.
+  ///
+  /// Returns true on successful disassembly, and updates the current
+  /// state to State:Disassembled.
+  ///
+  /// Returns false if disassembly failed.
+  bool disassemble(ArrayRef<uint8_t> FunctionData);
+
+  /// Builds a list of basic blocks with successor and predecessor info.
+  ///
+  /// The function should in Disassembled state prior to call.
+  ///
+  /// Returns true on success and update the current function state to
+  /// State::CFG. Returns false if CFG cannot be built.
+  bool buildCFG();
+
+  virtual ~BinaryFunction() {}
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+                               const BinaryFunction::State State) {
+  switch (State) {
+  default:                                  OS << "<unknown>"; break;
+  case BinaryFunction::State::Empty:        OS << "empty";  break;
+  case BinaryFunction::State::Disassembled: OS << "disassembled";  break;
+  case BinaryFunction::State::CFG:          OS << "CFG constructed";  break;
+  case BinaryFunction::State::Assembled:    OS << "assembled";  break;
+  }
+
+  return OS;
+}
+
+} // namespace flo
+
+} // namespace llvm
+
+#endif
--- a/bolt/CMakeLists.txt
+++ b/bolt/CMakeLists.txt
@ -2,7 +2,6 @@ set(LLVM_LINK_COMPONENTS
  ${LLVM_TARGETS_TO_BUILD}
  CodeGen
  Core
-  DebugInfoDWARF
  MC
  MCDisassembler
  MCParser
@ -13,4 +12,6 @@ set(LLVM_LINK_COMPONENTS

 add_llvm_tool(llvm-flo
  llvm-flo.cpp
+  BinaryBasicBlock.cpp
+  BinaryFunction.cpp
  )
--- a/bolt/LLVMBuild.txt
+++ b/bolt/LLVMBuild.txt
@ -19,4 +19,4 @@
 type = Tool
 name = llvm-flo
 parent = Tools
-required_libraries = DebugInfoDWARF MC MCDisassembler MCParser Object all-targets
+required_libraries = MC MCDisassembler MCParser Object all-targets
--- a/bolt/llvm-flo.cpp
+++ b/bolt/llvm-flo.cpp
@ -7,12 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 //
+// This is a binary optimizer that will take 'perf' output and change
+// basic block layout for better performance (a.k.a. branch straightening),
+// plus some other optimizations that are better performed on a binary.
+//
 //===----------------------------------------------------------------------===//

 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
 #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
 #include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler.h"
@ -20,7 +25,10 @@
 #include "llvm/MC/MCInstrAnalysis.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
@ -38,13 +46,20 @@
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Target/TargetMachine.h"

+#include "BinaryBasicBlock.h"
+#include "BinaryContext.h"
+#include "BinaryFunction.h"

 #include <algorithm>
 #include <map>
 #include <system_error>

+#undef  DEBUG_TYPE
+#define DEBUG_TYPE "flo"
+
 using namespace llvm;
 using namespace object;
+using namespace flo;

 // Tool options.
 static cl::opt<std::string>
@ -57,11 +72,16 @@ static cl::opt<std::string>
 OutputFilename("o", cl::desc("<output file>"), cl::Required);

 static cl::list<std::string>
-FunctionNames("funcs", cl::desc("list of functions to optimzize"),
-              cl::Optional);
+FunctionNames("funcs",
+              cl::CommaSeparated,
+              cl::desc("list of functions to optimize"),
+              cl::value_desc("func1,func2,func3,..."));

+static cl::opt<bool>
+EliminateUnreachable("eliminate-unreachable",
+                     cl::desc("eliminate unreachable code"),
+                     cl::Optional);

-// Tool name used for reporting.
 static StringRef ToolName;

 static void report_error(StringRef Message, std::error_code EC) {
@ -70,6 +90,576 @@ static void report_error(StringRef Message, std::error_code EC) {
  exit(1);
 }

+static void error(std::error_code EC) {
+  if (!EC)
+    return;
+
+  errs() << ToolName << ": error reading file: " << EC.message() << ".\n";
+  exit(1);
+}
+
+template <typename T>
+static std::vector<T> singletonSet(T t) {
+  std::vector<T> Vec;
+  Vec.push_back(std::move(t));
+  return Vec;
+}
+
+/// Class responsible for allocating and managing code and data sections.
+class ExecutableFileMemoryManager : public SectionMemoryManager {
+public:
+
+  // Keep [section name] -> [allocated address, size] map for later remapping.
+  std::map<std::string, std::pair<uint64_t,uint64_t>> SectionAddressInfo;
+
+  ExecutableFileMemoryManager() {}
+
+  uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                               unsigned SectionID,
+                               StringRef SectionName) override {
+    auto ret =
+      SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID,
+                                                SectionName);
+    DEBUG(dbgs() << "FLO: allocating code section : " << SectionName
+                 << " with size " << Size << ", alignment " << Alignment
+                 << " at 0x" << ret << "\n");
+
+    SectionAddressInfo[SectionName] = {reinterpret_cast<uint64_t>(ret), Size};
+
+    return ret;
+  }
+
+  uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                               unsigned SectionID, StringRef SectionName,
+                               bool IsReadOnly) override {
+    DEBUG(dbgs() << "FLO: allocating data section : " << SectionName
+                 << " with size " << Size << ", alignment "
+                 << Alignment << "\n");
+    errs() << "FLO-WARNING: allocating data section.\n";
+    return SectionMemoryManager::allocateDataSection(Size, Alignment, SectionID,
+                                                     SectionName, IsReadOnly);
+  }
+
+  // Tell EE that we guarantee we don't need stubs.
+  bool allowStubAllocation() const override { return false; }
+
+  bool finalizeMemory(std::string *ErrMsg = nullptr) override {
+    DEBUG(dbgs() << "FLO: finalizeMemory()\n");
+    return SectionMemoryManager::finalizeMemory(ErrMsg);
+  }
+};
+
+/// Create BinaryContext for a given architecture \p ArchName and
+/// triple \p TripleName.
+static std::unique_ptr<BinaryContext> CreateBinaryContext(
+    std::string ArchName,
+    std::string TripleName) {
+
+  std::string Error;
+
+  std::unique_ptr<Triple> TheTriple = llvm::make_unique<Triple>(TripleName);
+  const Target *TheTarget = TargetRegistry::lookupTarget(ArchName,
+                                                         *TheTriple,
+                                                         Error);
+  if (!TheTarget) {
+    errs() << ToolName << ": " << Error;
+    return nullptr;
+  }
+
+  std::unique_ptr<const MCRegisterInfo> MRI(
+      TheTarget->createMCRegInfo(TripleName));
+  if (!MRI) {
+    errs() << "error: no register info for target " << TripleName << "\n";
+    return nullptr;
+  }
+
+  // Set up disassembler.
+  std::unique_ptr<const MCAsmInfo> AsmInfo(
+      TheTarget->createMCAsmInfo(*MRI, TripleName));
+  if (!AsmInfo) {
+    errs() << "error: no assembly info for target " << TripleName << "\n";
+    return nullptr;
+  }
+
+  std::unique_ptr<const MCSubtargetInfo> STI(
+      TheTarget->createMCSubtargetInfo(TripleName, "", ""));
+  if (!STI) {
+    errs() << "error: no subtarget info for target " << TripleName << "\n";
+    return nullptr;
+  }
+
+  std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
+  if (!MII) {
+    errs() << "error: no instruction info for target " << TripleName << "\n";
+    return nullptr;
+  }
+
+  std::unique_ptr<MCObjectFileInfo> MOFI =
+    llvm::make_unique<MCObjectFileInfo>();
+  std::unique_ptr<MCContext> Ctx =
+    llvm::make_unique<MCContext>(AsmInfo.get(), MRI.get(), MOFI.get());
+  MOFI->InitMCObjectFileInfo(*TheTriple, Reloc::Default,
+                             CodeModel::Default, *Ctx);
+
+  std::unique_ptr<MCDisassembler> DisAsm(
+    TheTarget->createMCDisassembler(*STI, *Ctx));
+
+  if (!DisAsm) {
+    errs() << "error: no disassembler for target " << TripleName << "\n";
+    return nullptr;
+  }
+
+  std::unique_ptr<const MCInstrAnalysis> MIA(
+      TheTarget->createMCInstrAnalysis(MII.get()));
+  if (!MIA) {
+    errs() << "error: failed to create instruction analysis for target"
+           << TripleName << "\n";
+    return nullptr;
+  }
+
+  int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
+  std::unique_ptr<MCInstPrinter> InstructionPrinter(
+      TheTarget->createMCInstPrinter(Triple(TripleName), AsmPrinterVariant,
+                                     *AsmInfo, *MII, *MRI));
+  if (!InstructionPrinter) {
+    errs() << "error: no instruction printer for target " << TripleName
+           << '\n';
+    return nullptr;
+  }
+  InstructionPrinter->setPrintImmHex(true);
+
+  auto MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, *Ctx);
+
+  auto MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, "");
+
+  // Make sure we don't miss any output on core dumps.
+  outs().SetUnbuffered();
+  errs().SetUnbuffered();
+  dbgs().SetUnbuffered();
+
+  auto BC =
+      llvm::make_unique<BinaryContext>(std::move(Ctx),
+                                       std::move(TheTriple),
+                                       TheTarget,
+                                       MCE,
+                                       std::move(MOFI),
+                                       std::move(AsmInfo),
+                                       std::move(MII),
+                                       std::move(STI),
+                                       std::move(InstructionPrinter),
+                                       std::move(MIA),
+                                       std::move(MRI),
+                                       std::move(DisAsm),
+                                       MAB);
+
+  return BC;
+}
+
+static void OptimizeFile(ELFObjectFileBase *File) {
+
+  // FIXME: there should be some way to extract arch and triple information
+  //        from the file.
+  std::unique_ptr<BinaryContext> BC =
+    std::move(CreateBinaryContext("x86-64", "x86_64-unknown-linux"));
+  if (!BC) {
+    errs() << "failed to create a binary context\n";
+    return;
+  }
+
+  // Store all non-zero file symbols in this map for quick address lookup.
+  std::map<uint64_t, SymbolRef> FileSymRefs;
+
+  // Entry point to the binary.
+  //
+  // Note: this is ELF header entry point, but we could have more entry points
+  // from constructors etc.
+  BinaryFunction *EntryPointFunction{nullptr};
+
+  // Populate array of binary functions and file symbols
+  // from file symbol table.
+  //
+  // For local symbols we want to keep track of associated FILE symbol for
+  // disambiguation by name.
+  std::map<uint64_t, BinaryFunction> BinaryFunctions;
+  StringRef FileSymbolName;
+  for (const SymbolRef &Symbol : File->symbols()) {
+    // Keep undefined symbols for pretty printing?
+    if (Symbol.getFlags() & SymbolRef::SF_Undefined)
+      continue;
+
+    ErrorOr<StringRef> Name = Symbol.getName();
+    error(Name.getError());
+
+    if (Symbol.getType() == SymbolRef::ST_File) {
+      // Could be used for local symbol disambiguation.
+      FileSymbolName = *Name;
+      continue;
+    }
+
+    ErrorOr<uint64_t> AddressOrErr = Symbol.getAddress();
+    error(AddressOrErr.getError());
+    uint64_t Address = *AddressOrErr;
+    if (Address == 0) {
+      if (Symbol.getType() == SymbolRef::ST_Function)
+        errs() << "FLO-WARNING: function with 0 address seen\n";
+      continue;
+    }
+
+    FileSymRefs[Address] = Symbol;
+
+    // Only consider ST_Function symbols for functions. Although this
+    // assumption  could be broken by assembly functions for which the type
+    // could be wrong.
+    if (Symbol.getType() != SymbolRef::ST_Function) {
+      // FIXME: add it to the address map.
+      continue;
+    }
+
+    // TODO: populate address map with PLT entries for better readability.
+
+    // Ignore function with 0 size for now (possibly coming from assembly).
+    auto SymbolSize = ELFSymbolRef(Symbol).getSize();
+    if (SymbolSize == 0)
+      continue;
+
+    // There's nothing horribly wrong with anonymous symbols, but let's
+    // ignore them for now.
+    if (Name->empty())
+      continue;
+
+    ErrorOr<section_iterator> SectionOrErr = Symbol.getSection();
+    error(SectionOrErr.getError());
+    section_iterator Section = *SectionOrErr;
+    if (Section == File->section_end()) {
+      // Could be an absolute symbol. Could record for pretty printing.
+      continue;
+    }
+
+    // Disambiguate local function name. Since we don't know if we'll see
+    // a global with the same name, always modify the local function name.
+    std::string UniqueFunctionName;
+    if (!(Symbol.getFlags() & SymbolRef::SF_Global)) {
+      unsigned LocalCount = 1;
+      auto LocalName = *Name + "/" + FileSymbolName + "/";
+      while (BC->GlobalSymbols.find((LocalName + Twine(LocalCount)).str()) !=
+             BC->GlobalSymbols.end()) {
+        ++LocalCount;
+      }
+      UniqueFunctionName = (LocalName + Twine(LocalCount)).str();
+    } else {
+      auto I = BC->GlobalSymbols.find(*Name);
+      assert(I == BC->GlobalSymbols.end() && "global name not unique");
+      UniqueFunctionName = *Name;
+    }
+
+    // Create the function and add to the map.
+    BinaryFunctions.emplace(
+        Address,
+        BinaryFunction(UniqueFunctionName, Symbol, *Section, Address,
+                       SymbolSize, *BC)
+    );
+
+    // Add the name to global symbols map.
+    BC->GlobalSymbols[UniqueFunctionName] = Address;
+
+    // Add to the reverse map.
+    BC->GlobalAddresses.emplace(std::make_pair(Address, UniqueFunctionName));
+  }
+
+  // Disassemble every function and build it's control flow graph.
+  for (auto &BFI : BinaryFunctions) {
+    BinaryFunction &Function = BFI.second;
+
+    SectionRef Section = Function.getSection();
+    assert(Section.containsSymbol(Function.getSymbol()) &&
+           "symbol not in section");
+
+    // When could it happen?
+    if (!Section.isText() || Section.isVirtual() || !Section.getSize()) {
+      DEBUG(dbgs() << "FLO: corresponding section non-executable or empty "
+                   << "for function " << Function.getName());
+      continue;
+    }
+
+    // Set the proper maximum size value after the whole symbol table
+    // has been processed.
+    auto SymRefI = FileSymRefs.upper_bound(Function.getAddress());
+    if (SymRefI != FileSymRefs.end()) {
+      auto MaxSize = SymRefI->first - Function.getAddress();
+      assert(MaxSize >= Function.getSize() &&
+             "symbol seen in the middle of the function");
+      Function.setMaxSize(MaxSize);
+    }
+
+    StringRef SectionContents;
+    error(Section.getContents(SectionContents));
+
+    assert(SectionContents.size() == Section.getSize() &&
+           "section size mismatch");
+
+    // Function offset from the section start.
+    auto FunctionOffset = Function.getAddress() - Section.getAddress();
+
+    // Offset of the function in the file.
+    Function.setFileOffset(
+        SectionContents.data() - File->getData().data() + FunctionOffset);
+
+    ArrayRef<uint8_t> FunctionData(
+        reinterpret_cast<const uint8_t *>
+          (SectionContents.data()) + FunctionOffset,
+        Function.getSize());
+
+    if (!Function.disassemble(FunctionData))
+      continue;
+
+    if (!Function.buildCFG())
+      continue;
+
+  } // Iterate over all functions
+
+
+  // Run optimization passes.
+  //
+  // FIXME: use real optimization passes.
+  for (auto &BFI : BinaryFunctions) {
+    auto &Function = BFI.second;
+    // Detect and eliminate unreachable basic blocks. We could have those
+    // filled with nops and they are used for alignment.
+    //
+    // FIXME: this wouldn't work with C++ exceptions until we implement
+    //        support for those as there will be "invisible" edges
+    //        in the graph.
+    if (EliminateUnreachable) {
+      bool IsFirst = true;
+      for (auto &BB : Function) {
+        if (!IsFirst && BB.pred_empty()) {
+          outs() << "FLO: basic block " << BB.getName() << " in function "
+                 << Function.getName() << " is dead\n";
+          // TODO: currently lacking interface to eliminate basic block.
+        }
+        IsFirst = false;
+      }
+      DEBUG(dbgs() << "*** After unreachable block elimination ***\n");
+      DEBUG(Function.print(dbgs(), /* PrintInstructions = */ true));
+    }
+  }
+
+  std::error_code EC;
+  std::unique_ptr<tool_output_file> Out =
+    llvm::make_unique<tool_output_file>(OutputFilename + ".o",
+                                        EC,sys::fs::F_None);
+
+  if (EC) {
+    // FIXME: handle error
+    return;
+  }
+
+  std::unique_ptr<tool_output_file> RealOut =
+    llvm::make_unique<tool_output_file>(OutputFilename, EC, sys::fs::F_None,
+                                        0777);
+  if (EC) {
+    // FIXME: handle error
+    return;
+  }
+
+  // Copy input file.
+  RealOut->os() << File->getData();
+
+  std::unique_ptr<buffer_ostream> BOS =
+      make_unique<buffer_ostream>(Out->os());
+  raw_pwrite_stream *OS = BOS.get();
+
+  // Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB)
+  // and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these
+  // two instances.
+  std::unique_ptr<MCStreamer> Streamer(
+    BC->TheTarget->createMCObjectStreamer(*BC->TheTriple,
+                                          *BC->Ctx,
+                                          *BC->MAB,
+                                          *OS,
+                                          BC->MCE,
+                                          *BC->STI,
+                                          /* RelaxAll */ false,
+                                          /* DWARFMustBeAtTheEnd */ false));
+
+  Streamer->InitSections(false);
+
+  // Output functions one by one.
+  for (auto &BFI : BinaryFunctions) {
+    auto &Function = BFI.second;
+
+    if (!Function.isSimple())
+      continue;
+
+    // Only overwrite functions from the list if non-empty.
+    if (!FunctionNames.empty()) {
+      bool IsValid = false;
+      for (auto &Name : FunctionNames) {
+        if (Function.getName() == Name) {
+          IsValid = true;
+          break;
+        }
+      }
+      if (!IsValid)
+        continue;
+    }
+
+    DEBUG(dbgs() << "FLO: generating code for function \""
+                 << Function.getName() << "\"\n");
+
+    // No need for human readability?
+    // FIXME: what difference does it make in reality?
+    //Ctx.setUseNamesOnTempLabels(false);
+
+    // Emit function start
+
+    // Each fuction is emmitted into its own section.
+    MCSectionELF *FunctionSection =
+      BC->Ctx->getELFSection(Function.getCodeSectionName(),
+                             ELF::SHT_PROGBITS,
+                             ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
+
+    MCSection *Section = FunctionSection;
+    Streamer->SwitchSection(Section);
+
+    Streamer->EmitCodeAlignment(Function.getAlignment());
+
+    MCSymbol *FunctionSymbol = BC->Ctx->getOrCreateSymbol(Function.getName());
+    Streamer->EmitSymbolAttribute(FunctionSymbol, MCSA_ELF_TypeFunction);
+    Streamer->EmitLabel(FunctionSymbol);
+
+    // Emit code.
+    for (const auto &BB : Function) {
+      Streamer->EmitLabel(BB.getLabel());
+      for (const auto &Instr : BB) {
+        Streamer->EmitInstruction(Instr, *BC->STI);
+      }
+    }
+
+    // TODO: is there any use in emiting end of function?
+    //       Perhaps once we have a support for C++ exceptions.
+    //auto FunctionEndLabel = Ctx.createTempSymbol("func_end");
+    //Streamer->EmitLabel(FunctionEndLabel);
+    //Streamer->emitELFSize(FunctionSymbol, MCExpr());
+  }
+
+  Streamer->Finish();
+
+  // Get output object as ObjectFile.
+  std::unique_ptr<MemoryBuffer> ObjectMemBuffer =
+      MemoryBuffer::getMemBuffer(BOS->str(), "in-memory object file", false);
+  ErrorOr<std::unique_ptr<object::ObjectFile>> ObjOrErr =
+    object::ObjectFile::createObjectFile(ObjectMemBuffer->getMemBufferRef());
+
+  if (std::error_code EC = ObjOrErr.getError()) {
+    report_error(InputFilename, EC);
+    return;
+  }
+
+  std::unique_ptr<ExecutableFileMemoryManager>
+    EFMM(new ExecutableFileMemoryManager());
+
+  // FIXME: use notifyObjectLoaded() to remap sections.
+
+  DEBUG(dbgs() << "Creating OLT\n");
+  // Run ObjectLinkingLayer() with custom memory manager and symbol resolver.
+  orc::ObjectLinkingLayer<> OLT;
+
+  auto Resolver = orc::createLambdaResolver(
+          [&](const std::string &Name) {
+            DEBUG(dbgs() << "FLO: looking for " << Name << "\n");
+            auto I = BC->GlobalSymbols.find(Name);
+            if (I == BC->GlobalSymbols.end())
+              return RuntimeDyld::SymbolInfo(nullptr);
+            return RuntimeDyld::SymbolInfo(I->second,
+                                           JITSymbolFlags::None);
+          },
+          [](const std::string &S) {
+            DEBUG(dbgs() << "FLO: resolving " << S << "\n");
+            return nullptr;
+          }
+      );
+  // FIXME:
+  auto ObjectsHandle = OLT.addObjectSet(
+        singletonSet(std::move(ObjOrErr.get())),
+        EFMM.get(),
+        //std::move(EFMM),
+        std::move(Resolver));
+  //OLT.takeOwnershipOfBuffers(ObjectsHandle, );
+
+  // Map every function/section current address in memory to that in
+  // the output binary.
+  for (auto &BFI : BinaryFunctions) {
+    auto &Function = BFI.second;
+    if (!Function.isSimple())
+      continue;
+
+    auto SAI = EFMM->SectionAddressInfo.find(Function.getCodeSectionName());
+    if (SAI != EFMM->SectionAddressInfo.end()) {
+      DEBUG(dbgs() << "FLO: mapping 0x" << Twine::utohexstr(SAI->second.first)
+                   << " to 0x" << Twine::utohexstr(Function.getAddress())
+                   << '\n');
+      OLT.mapSectionAddress(ObjectsHandle,
+          reinterpret_cast<const void*>(SAI->second.first),
+          Function.getAddress());
+      Function.setImageAddress(SAI->second.first);
+      Function.setImageSize(SAI->second.second);
+    } else {
+      errs() << "FLO: cannot remap function " << Function.getName() << "\n";
+    }
+  }
+
+  OLT.emitAndFinalize(ObjectsHandle);
+
+  // FIXME: is there a less painful way to obtain assembler/writer?
+  auto &Writer =
+    static_cast<MCObjectStreamer*>(Streamer.get())->getAssembler().getWriter();
+  Writer.setStream(RealOut->os());
+
+  // Overwrite function in the output file.
+  for (auto &BFI : BinaryFunctions) {
+    auto &Function = BFI.second;
+
+    if (Function.getImageAddress() == 0 || Function.getImageSize() == 0)
+      continue;
+
+    if (Function.getImageSize() > Function.getMaxSize()) {
+      errs() << "FLO-WARNING: new function size (0x"
+             << Twine::utohexstr(Function.getImageSize())
+             << ") is larger than maximum allowed size (0x"
+             << Twine::utohexstr(Function.getMaxSize())
+             << ") for function " << Function.getName() << '\n';
+      continue;
+    }
+
+    // Overwrite function in the output file.
+    outs() << "FLO: rewriting function \"" << Function.getName() << "\"\n";
+    RealOut->os().pwrite(
+        reinterpret_cast<char *>(Function.getImageAddress()),
+        Function.getImageSize(),
+        Function.getFileOffset());
+
+    // Write nops at the end of the function.
+    auto Pos = RealOut->os().tell();
+    RealOut->os().seek(Function.getFileOffset() + Function.getImageSize());
+    BC->MAB->writeNopData(Function.getMaxSize() - Function.getImageSize(),
+                          &Writer);
+    RealOut->os().seek(Pos);
+  }
+
+  if (EntryPointFunction) {
+    DEBUG(dbgs() << "FLO: entry point function is "
+                 << EntryPointFunction->getName() << '\n');
+  } else {
+    DEBUG(dbgs() << "FLO: no entry point function was set\n");
+  }
+
+  // TODO: we should find a way to mark the binary as optimized by us.
+
+  Out->keep();
+  RealOut->keep();
+}
+
 int main(int argc, char **argv) {
  // Print a stack trace if we signal out.
  sys::PrintStackTraceOnErrorSignal();
@ -104,7 +694,7 @@ int main(int argc, char **argv) {
  Binary &Binary = *BinaryOrErr.get().getBinary();

  if (ELFObjectFileBase *e = dyn_cast<ELFObjectFileBase>(&Binary)) {
-    outs() << "mind blown : " << e << "!\n";
+    OptimizeFile(e);
  } else {
    report_error(InputFilename, object_error::invalid_file_type);
  }