Commit FLO with control flow graph.

Summary:
llvm-flo disassembles, builds control flow graph, and re-writes
simple functions.

(cherry picked from FBD2524024)
This commit is contained in:
Maksim Panchenko 2015-10-09 17:21:14 -07:00
parent 7927c14ff5
commit 9a2fe7ebe4
8 changed files with 1768 additions and 6 deletions

65
bolt/BinaryBasicBlock.cpp Normal file
View File

@ -0,0 +1,65 @@
//===--- BinaryBasicBlock.cpp - Interface for assembly-level basic block --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include <limits>
#include <string>
#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"
#undef DEBUG_TYPE
#define DEBUG_TYPE "flo"
namespace llvm {
namespace flo {
bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS) {
return LHS.Offset < RHS.Offset;
}
void BinaryBasicBlock::addSuccessor(BinaryBasicBlock *Succ,
uint64_t Count,
uint64_t MispredictedCount) {
Successors.push_back(Succ);
Succ->Predecessors.push_back(this);
// TODO: update weights.
}
void BinaryBasicBlock::removeSuccessor(BinaryBasicBlock *Succ) {
Succ->removePredecessor(this);
auto I = std::find(succ_begin(), succ_end(), Succ);
assert(I != succ_end() && "no such successor!");
Successors.erase(I);
// TODO: update weights.
}
void BinaryBasicBlock::addPredecessor(BinaryBasicBlock *Pred) {
Predecessors.push_back(Pred);
}
void BinaryBasicBlock::removePredecessor(BinaryBasicBlock *Pred) {
auto I = std::find(pred_begin(), pred_end(), Pred);
assert(I != pred_end() && "Pred is not a predecessor of this block!");
Predecessors.erase(I);
}
} // namespace flo
} // namespace llvm

212
bolt/BinaryBasicBlock.h Normal file
View File

@ -0,0 +1,212 @@
//===--- BinaryBasicBlock.h - Interface for assembly-level basic block ----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// TODO: memory management for instructions.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_FLO_BINARY_BASIC_BLOCK_H
#define LLVM_TOOLS_LLVM_FLO_BINARY_BASIC_BLOCK_H
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ilist.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <limits>
namespace llvm {
namespace flo {
class BinaryFunction;
/// The intention is to keep the structure similar to MachineBasicBlock as
/// we might switch to it at some point.
class BinaryBasicBlock {
/// Label associated with the block.
MCSymbol *Label{nullptr};
/// Original offset in the function.
uint64_t Offset{std::numeric_limits<uint64_t>::max()};
/// Alignment requirements for the block.
uint64_t Alignment{1};
/// Vector of all instructions in the block.
std::vector<MCInst> Instructions;
/// CFG information.
std::vector<BinaryBasicBlock *> Predecessors;
std::vector<BinaryBasicBlock *> Successors;
struct BinaryBranchInfo {
uint64_t Count;
uint64_t MispredictedCount; /// number of branches mispredicted
};
/// Each successor has a corresponding BranchInfo entry in the list.
std::vector<BinaryBranchInfo> BranchInfo;
typedef std::vector<BinaryBranchInfo>::iterator branch_info_iterator;
typedef std::vector<BinaryBranchInfo>::const_iterator
const_branch_info_iterator;
BinaryBasicBlock() {}
explicit BinaryBasicBlock(
MCSymbol *Label,
uint64_t Offset = std::numeric_limits<uint64_t>::max())
: Label(Label), Offset(Offset) {}
explicit BinaryBasicBlock(uint64_t Offset)
: Offset(Offset) {}
// Exclusively managed by BinaryFunction.
friend class BinaryFunction;
friend bool operator<(const BinaryBasicBlock &LHS,
const BinaryBasicBlock &RHS);
public:
// Instructions iterators.
typedef std::vector<MCInst>::iterator iterator;
typedef std::vector<MCInst>::const_iterator const_iterator;
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
typedef std::reverse_iterator<iterator> reverse_iterator;
MCInst &front() { return Instructions.front(); }
MCInst &back() { return Instructions.back(); }
const MCInst &front() const { return Instructions.front(); }
const MCInst &back() const { return Instructions.back(); }
iterator begin() { return Instructions.begin(); }
const_iterator begin() const { return Instructions.begin(); }
iterator end () { return Instructions.end(); }
const_iterator end () const { return Instructions.end(); }
reverse_iterator rbegin() { return Instructions.rbegin(); }
const_reverse_iterator rbegin() const { return Instructions.rbegin(); }
reverse_iterator rend () { return Instructions.rend(); }
const_reverse_iterator rend () const { return Instructions.rend(); }
// CFG iterators.
typedef std::vector<BinaryBasicBlock *>::iterator pred_iterator;
typedef std::vector<BinaryBasicBlock *>::const_iterator const_pred_iterator;
typedef std::vector<BinaryBasicBlock *>::iterator succ_iterator;
typedef std::vector<BinaryBasicBlock *>::const_iterator const_succ_iterator;
typedef std::vector<BinaryBasicBlock *>::reverse_iterator
pred_reverse_iterator;
typedef std::vector<BinaryBasicBlock *>::const_reverse_iterator
const_pred_reverse_iterator;
typedef std::vector<BinaryBasicBlock *>::reverse_iterator
succ_reverse_iterator;
typedef std::vector<BinaryBasicBlock *>::const_reverse_iterator
const_succ_reverse_iterator;
pred_iterator pred_begin() { return Predecessors.begin(); }
const_pred_iterator pred_begin() const { return Predecessors.begin(); }
pred_iterator pred_end() { return Predecessors.end(); }
const_pred_iterator pred_end() const { return Predecessors.end(); }
pred_reverse_iterator pred_rbegin()
{ return Predecessors.rbegin();}
const_pred_reverse_iterator pred_rbegin() const
{ return Predecessors.rbegin();}
pred_reverse_iterator pred_rend()
{ return Predecessors.rend(); }
const_pred_reverse_iterator pred_rend() const
{ return Predecessors.rend(); }
unsigned pred_size() const {
return (unsigned)Predecessors.size();
}
bool pred_empty() const { return Predecessors.empty(); }
succ_iterator succ_begin() { return Successors.begin(); }
const_succ_iterator succ_begin() const { return Successors.begin(); }
succ_iterator succ_end() { return Successors.end(); }
const_succ_iterator succ_end() const { return Successors.end(); }
succ_reverse_iterator succ_rbegin()
{ return Successors.rbegin(); }
const_succ_reverse_iterator succ_rbegin() const
{ return Successors.rbegin(); }
succ_reverse_iterator succ_rend()
{ return Successors.rend(); }
const_succ_reverse_iterator succ_rend() const
{ return Successors.rend(); }
unsigned succ_size() const {
return (unsigned)Successors.size();
}
bool succ_empty() const { return Successors.empty(); }
inline iterator_range<pred_iterator> predecessors() {
return iterator_range<pred_iterator>(pred_begin(), pred_end());
}
inline iterator_range<const_pred_iterator> predecessors() const {
return iterator_range<const_pred_iterator>(pred_begin(), pred_end());
}
inline iterator_range<succ_iterator> successors() {
return iterator_range<succ_iterator>(succ_begin(), succ_end());
}
inline iterator_range<const_succ_iterator> successors() const {
return iterator_range<const_succ_iterator>(succ_begin(), succ_end());
}
/// Return symbol marking the start of this basic block.
MCSymbol *getLabel() const {
return Label;
}
/// Return local name for the block.
StringRef getName() const {
return Label->getName();
}
/// Add instruction at the end of this basic block.
void addInstruction(MCInst &Inst) {
Instructions.emplace_back(Inst);
}
/// Return required alignment for the block.
uint64_t getAlignment() const {
return Alignment;
}
/// Adds block to successor list, and also updates predecessor list for
/// successor block.
/// Set branch info for this path.
void addSuccessor(BinaryBasicBlock *Succ,
uint64_t Count = 0,
uint64_t MispredictedCount = 0);
/// Remove /p Succ basic block from the list of successors. Update the
/// list of predecessors of /p Succ and update branch info.
void removeSuccessor(BinaryBasicBlock *Succ);
private:
/// Adds predecessor to the BB. Most likely you don't need to call this.
void addPredecessor(BinaryBasicBlock *Pred);
/// Remove predecessor of the basic block. Don't use directly, instead
/// use removeSuccessor() funciton.
void removePredecessor(BinaryBasicBlock *Pred);
};
bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS);
} // namespace flo
} // namespace llvm
#endif

114
bolt/BinaryContext.h Normal file
View File

@ -0,0 +1,114 @@
//===--- BinaryContext.h - Interface for machine-level context -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_FLO_BINARY_CONTEXT_H
#define LLVM_TOOLS_LLVM_FLO_BINARY_CONTEXT_H
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
#include <functional>
#include <map>
#include <string>
#include <system_error>
namespace llvm {
namespace flo {
/// Everything that's needed to process binaries lives here.
class BinaryContext {
BinaryContext() = delete;
public:
// [name] -> [address]
typedef std::map<std::string, uint64_t> SymbolMapType;
SymbolMapType GlobalSymbols;
// [address] -> [name1], [name2], ...
std::multimap<uint64_t, std::string> GlobalAddresses;
std::unique_ptr<MCContext> Ctx;
std::unique_ptr<Triple> TheTriple;
const Target *TheTarget;
MCCodeEmitter *MCE;
std::unique_ptr<MCObjectFileInfo> MOFI;
std::unique_ptr<const MCAsmInfo> AsmInfo;
std::unique_ptr<const MCInstrInfo> MII;
std::unique_ptr<const MCSubtargetInfo> STI;
std::unique_ptr<MCInstPrinter> InstPrinter;
std::unique_ptr<const MCInstrAnalysis> MIA;
std::unique_ptr<const MCRegisterInfo> MRI;
std::unique_ptr<MCDisassembler> DisAsm;
std::function<void(std::error_code)> ErrorCheck;
MCAsmBackend *MAB;
BinaryContext(std::unique_ptr<MCContext> Ctx,
std::unique_ptr<Triple> TheTriple,
const Target *TheTarget,
MCCodeEmitter *MCE,
std::unique_ptr<MCObjectFileInfo> MOFI,
std::unique_ptr<const MCAsmInfo> AsmInfo,
std::unique_ptr<const MCInstrInfo> MII,
std::unique_ptr<const MCSubtargetInfo> STI,
std::unique_ptr<MCInstPrinter> InstPrinter,
std::unique_ptr<const MCInstrAnalysis> MIA,
std::unique_ptr<const MCRegisterInfo> MRI,
std::unique_ptr<MCDisassembler> DisAsm,
MCAsmBackend *MAB) :
Ctx(std::move(Ctx)),
TheTriple(std::move(TheTriple)),
TheTarget(TheTarget),
MCE(MCE),
MOFI(std::move(MOFI)),
AsmInfo(std::move(AsmInfo)),
MII(std::move(MII)),
STI(std::move(STI)),
InstPrinter(std::move(InstPrinter)),
MIA(std::move(MIA)),
MRI(std::move(MRI)),
DisAsm(std::move(DisAsm)),
MAB(MAB) {}
~BinaryContext() {}
};
} // namespace flo
} // namespace llvm
#endif

381
bolt/BinaryFunction.cpp Normal file
View File

@ -0,0 +1,381 @@
//===--- BinaryFunction.cpp - Interface for machine-level function --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <limits>
#include <string>
#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"
#undef DEBUG_TYPE
#define DEBUG_TYPE "flo"
namespace llvm {
namespace flo {
void BinaryFunction::print(raw_ostream &OS, bool PrintInstructions) const {
StringRef SectionName;
Section.getName(SectionName);
OS << "Binary Function \"" << getName() << "\" {"
<< "\n State : " << CurrentState
<< "\n Address : 0x" << Twine::utohexstr(Address)
<< "\n Size : 0x" << Twine::utohexstr(Size)
<< "\n MaxSize : 0x" << Twine::utohexstr(MaxSize)
<< "\n Offset : 0x" << Twine::utohexstr(FileOffset)
<< "\n Section : " << SectionName
<< "\n Orc Section : " << getCodeSectionName()
<< "\n IsSimple : " << IsSimple
<< "\n BB count : " << BasicBlocks.size()
<< "\n Image : 0x" << Twine::utohexstr(ImageAddress)
<< "\n}\n";
if (!PrintInstructions || !BC.InstPrinter)
return;
// Offset of the instruction in function.
uint64_t Offset{0};
if (BasicBlocks.empty() && !Instructions.empty()) {
// Print before CFG was built.
for (const auto &II : Instructions) {
auto Offset = II.first;
// Print label if exists at this offset.
auto LI = Labels.find(Offset);
if (LI != Labels.end())
OS << LI->second->getName() << ":\n";
auto &Instruction = II.second;
OS << format(" %08" PRIx64 ": ", Offset);
BC.InstPrinter->printInst(&Instruction, OS, "", *BC.STI);
OS << "\n";
}
}
for (const auto &BB : BasicBlocks) {
OS << BB.getName() << " ("
<< BB.Instructions.size() << " instructions)\n";
if (!BB.Predecessors.empty()) {
OS << " Predecessors: ";
auto Sep = "";
for (auto Pred : BB.Predecessors) {
OS << Sep << Pred->getName();
Sep = ", ";
}
OS << '\n';
}
Offset = RoundUpToAlignment(Offset, BB.getAlignment());
for (auto &Instr : BB) {
OS << format(" %08" PRIx64 ": ", Offset);
BC.InstPrinter->printInst(&Instr, OS, "", *BC.STI);
OS << "\n";
// In case we need MCInst printer:
// Instr.dump_pretty(OS, InstructionPrinter.get());
// Calculate the size of the instruction.
// Note: this is imprecise since happening prior to relaxation.
SmallString<256> Code;
SmallVector<MCFixup, 4> Fixups;
raw_svector_ostream VecOS(Code);
BC.MCE->encodeInstruction(Instr, VecOS, Fixups, *BC.STI);
Offset += Code.size();
}
if (!BB.Successors.empty()) {
OS << " Successors: ";
auto Sep = "";
for (auto Succ : BB.Successors) {
OS << Sep << Succ->getName();
Sep = ", ";
}
OS << '\n';
}
OS << '\n';
}
OS << "End of Function \"" << getName() << "\"\n";
}
bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
assert(FunctionData.size() == getSize() &&
"function size does not match raw data size");
auto &Ctx = BC.Ctx;
auto &MIA = BC.MIA;
// Insert a label at the beginning of the function. This will be our first
// basic block.
Labels[0] = Ctx->createTempSymbol("BB0", false);
bool IsSimple = true;
for (uint64_t Offset = 0; IsSimple && (Offset < getSize()); ) {
MCInst Instruction;
uint64_t Size;
if (!BC.DisAsm->getInstruction(Instruction,
Size,
FunctionData.slice(Offset),
getAddress() + Offset,
nulls(),
nulls())) {
// Ignore this function. Skip to the next one.
IsSimple = false;
break;
}
if (MIA->isIndirectBranch(Instruction)) {
IsSimple = false;
break;
}
if (MIA->isBranch(Instruction) || MIA->isCall(Instruction)) {
uint64_t InstructionTarget = 0;
uint64_t AbsoluteInstrAddr = getAddress() + Offset;
if (MIA->evaluateBranch(Instruction,
AbsoluteInstrAddr,
Size,
InstructionTarget)) {
// Check if the target is within the same function. Otherwise it's
// a call, possibly a tail call.
//
// If the target *is* the function address it could be either a branch
// or a recursive call.
bool IsCall = MIA->isCall(Instruction);
MCSymbol *TargetSymbol{nullptr};
uint64_t TargetOffset{0};
if (IsCall && containsAddress(InstructionTarget)) {
if (InstructionTarget == getAddress()) {
// Recursive call.
TargetSymbol = Ctx->getOrCreateSymbol(getName());
} else {
// Possibly an old-style PIC code
DEBUG(dbgs() << "FLO: internal call detected at 0x"
<< Twine::utohexstr(AbsoluteInstrAddr)
<< " in function " << getName() << "\n");
IsSimple = false;
break;
}
}
if (!TargetSymbol) {
// Create either local label or external symbol.
if (containsAddress(InstructionTarget)) {
// Check if there's already a registered label.
TargetOffset = InstructionTarget - getAddress();
auto LI = Labels.find(TargetOffset);
if (LI == Labels.end()) {
TargetSymbol = Ctx->createTempSymbol();
Labels[TargetOffset] = TargetSymbol;
} else {
TargetSymbol = LI->second;
}
} else {
// This is a call regardless of the opcode (e.g. tail call).
IsCall = true;
// Check if we already have a symbol at this address.
std::string Name;
auto NI = BC.GlobalAddresses.find(InstructionTarget);
if (NI != BC.GlobalAddresses.end()) {
// Any registered name will do.
Name = NI->second;
} else {
// Create a new symbol at the destination.
Name = (Twine("FUNCat0x") +
Twine::utohexstr(InstructionTarget)).str();
BC.GlobalAddresses.emplace(std::make_pair(InstructionTarget,
Name));
}
TargetSymbol = Ctx->getOrCreateSymbol(Name);
BC.GlobalSymbols[Name] = InstructionTarget;
}
}
Instruction.clear();
Instruction.addOperand(
MCOperand::createExpr(
MCSymbolRefExpr::create(TargetSymbol,
MCSymbolRefExpr::VK_None,
*Ctx)));
if (!IsCall) {
// Add local branch info.
LocalBranches.push_back({Offset, TargetOffset});
}
} else {
// Indirect call
IsSimple = false;
break;
}
} else {
if (MIA->hasRIPOperand(Instruction)) {
DEBUG(dbgs() << "FLO: rip-relative instruction found "
"(not supported yet)\n");
IsSimple = false;
break;
}
}
addInstruction(Offset, std::move(Instruction));
Offset += Size;
}
setSimple(IsSimple);
// TODO: clear memory if not simple function?
// Update state.
updateState(State::Disassembled);
// Print the function in the new state.
DEBUG(print(dbgs(), /* PrintInstructions = */ true));
return true;
}
bool BinaryFunction::buildCFG() {
auto &MIA = BC.MIA;
if (!isSimple())
return false;
if (!(CurrentState == State::Disassembled))
return false;
assert(BasicBlocks.empty() && "basic block list should be empty");
assert((Labels.find(0) != Labels.end()) &&
"first instruction should always have a label");
// Create basic blocks in the original layout order:
//
// * Every instruction with associated label marks
// the beginning of a basic block.
// * Conditional instruction marks the end of a basic block,
// except when the following instruction is an
// unconditional branch, and the unconditional branch is not
// a destination of another branch. In the latter case, the
// basic block will consist of a single unconditional branch
// (missed optimization opportunity?).
//
// Created basic blocks are sorted in layout order since they are
// created in the same order as instructions, and instructions are
// sorted by offsets.
BinaryBasicBlock *InsertBB{nullptr};
BinaryBasicBlock *PrevBB{nullptr};
for (auto &InstrInfo : Instructions) {
auto LI = Labels.find(InstrInfo.first);
if (LI != Labels.end()) {
// Always create new BB at branch destination.
PrevBB = InsertBB;
InsertBB = addBasicBlock(LI->first, LI->second);
}
if (!InsertBB) {
// It must be a fallthrough. Create a new block unless we see an
// unconditional branch.
assert(PrevBB && "no previous basic block for a fall through");
if (MIA->isUnconditionalBranch(InstrInfo.second)) {
// Temporarily restore inserter basic block.
InsertBB = PrevBB;
} else {
InsertBB = addBasicBlock(InstrInfo.first,
BC.Ctx->createTempSymbol("FT", true));
}
}
InsertBB->addInstruction(InstrInfo.second);
// How well do we detect tail calls here?
if (MIA->isTerminator(InstrInfo.second)) {
PrevBB = InsertBB;
InsertBB = nullptr;
}
}
// Intermediate dump.
DEBUG(print(dbgs(), /* PrintInstructions = */ true));
// TODO: handle properly calls to no-return functions,
// e.g. exit(3), etc. Otherwise we'll see a false fall-through
// blocks.
for (auto &Branch : LocalBranches) {
DEBUG(dbgs() << "registering branch [0x" << Twine::utohexstr(Branch.first)
<< "] -> [0x" << Twine::utohexstr(Branch.second) << "]\n");
BinaryBasicBlock *FromBB = getBasicBlockContainingOffset(Branch.first);
assert(FromBB && "cannot find BB containing FROM branch");
BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Branch.second);
assert(ToBB && "cannot find BB containing TO branch");
// TODO: add weights here.
//
FromBB->addSuccessor(ToBB);
}
// Add fall-through branches.
PrevBB = nullptr;
bool IsPrevFT = false; // Is previous block a fall-through.
for (auto &BB : BasicBlocks) {
if (IsPrevFT) {
PrevBB->addSuccessor(&BB);
}
MCInst &LastInst = BB.back();
if (BB.succ_size() == 0) {
IsPrevFT = MIA->isTerminator(LastInst) ? false : true;
} else if (BB.succ_size() == 1) {
IsPrevFT = MIA->isConditionalBranch(LastInst) ? true : false;
} else {
// Either ends with 2 branches, or with an indirect jump.
IsPrevFT = false;
}
PrevBB = &BB;
}
if (!IsPrevFT) {
// Possibly a call that does not return.
DEBUG(dbgs() << "last block was marked as a fall-through\n");
}
// Clean-up memory taken by instructions and labels.
clearInstructions();
clearLabels();
clearLocalBranches();
// Update the state.
CurrentState = State::CFG;
// Print the function in the new state.
DEBUG(print(dbgs(), /* PrintInstructions = */ true));
return true;
}
} // namespace flo
} // namespace llvm

399
bolt/BinaryFunction.h Normal file
View File

@ -0,0 +1,399 @@
//===--- BinaryFunction.h - Interface for machine-level function ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Interface to function in binary (machine) form. This is assembly-level
// code representation with the control flow.
//
// TODO: memory management for instructions.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_FLO_BINARY_FUNCTION_H
#define LLVM_TOOLS_LLVM_FLO_BINARY_FUNCTION_H
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ilist.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <limits>
#include "BinaryBasicBlock.h"
#include "BinaryContext.h"
using namespace llvm::object;
namespace llvm {
namespace flo {
/// BinaryFunction is a representation of machine-level function.
//
/// We use the term "Binary" as "Machine" was already taken.
class BinaryFunction {
public:
enum class State : char {
Empty = 0, /// Function body is empty
Disassembled, /// Function have been disassembled
CFG, /// Control flow graph have been built
Assembled, /// Function has been assembled in memory
};
static constexpr uint64_t COUNT_NO_PROFILE =
std::numeric_limits<uint64_t>::max();
private:
/// Current state of the function.
State CurrentState{State::Empty};
/// Name of the function as we know it.
std::string Name;
/// Symbol associated with this function.
SymbolRef Symbol;
/// Containing section
SectionRef Section;
/// Address of the function in memory. Also could be an offset from
/// base address for position independent binaries.
uint64_t Address;
/// Original size of the function.
uint64_t Size;
/// Offset in the file.
uint64_t FileOffset{0};
/// Maximum size this function is allowed to have.
uint64_t MaxSize{std::numeric_limits<uint64_t>::max()};
/// Alignment requirements for the function.
uint64_t Alignment{1};
/// False if the function is too complex to reconstruct its control
/// flow graph and re-assemble.
bool IsSimple{true};
BinaryContext &BC;
/// The address for the code for this function in codegen memory.
uint64_t ImageAddress{0};
/// The size of the code in memory.
uint64_t ImageSize{0};
/// Name for the section this function code should reside in.
std::string CodeSectionName;
/// The profile data for the number of times the function was executed.
uint64_t ExecutionCount{COUNT_NO_PROFILE};
/// Release storage used by instructions.
BinaryFunction &clearInstructions() {
std::map<uint64_t, MCInst> TempMap;
Instructions.swap(TempMap);
return *this;
}
/// Release storage used by instructions.
BinaryFunction &clearLabels() {
std::map<uint64_t, MCSymbol *> TempMap;
Labels.swap(TempMap);
return *this;
}
/// Release memory taken by local branch info.
BinaryFunction &clearLocalBranches() {
std::vector<std::pair<uint64_t, uint64_t>> TempVector;
LocalBranches.swap(TempVector);
return *this;
}
BinaryFunction &updateState(BinaryFunction::State State) {
CurrentState = State;
return *this;
}
public:
std::vector<std::pair<uint64_t, uint64_t>> LocalBranches;
std::map<uint64_t, MCSymbol *> Labels;
/// Temporary holder of instructions before CFG is constructed.
std::map<uint64_t, MCInst> Instructions;
// Blocks are kept sorted in the layout order. If we need to change the
// layout, the terminating instructions need to be modified.
typedef std::vector<BinaryBasicBlock> BasicBlockListType;
BasicBlockListType BasicBlocks;
typedef BasicBlockListType::iterator iterator;
typedef BasicBlockListType::const_iterator const_iterator;
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
typedef std::reverse_iterator<iterator> reverse_iterator;
// CFG iterators.
iterator begin() { return BasicBlocks.begin(); }
const_iterator begin() const { return BasicBlocks.begin(); }
iterator end () { return BasicBlocks.end(); }
const_iterator end () const { return BasicBlocks.end(); }
reverse_iterator rbegin() { return BasicBlocks.rbegin(); }
const_reverse_iterator rbegin() const { return BasicBlocks.rbegin(); }
reverse_iterator rend () { return BasicBlocks.rend(); }
const_reverse_iterator rend () const { return BasicBlocks.rend(); }
unsigned size() const { return (unsigned)BasicBlocks.size();}
bool empty() const { return BasicBlocks.empty(); }
const BinaryBasicBlock &front() const { return BasicBlocks.front(); }
BinaryBasicBlock &front() { return BasicBlocks.front(); }
const BinaryBasicBlock & back() const { return BasicBlocks.back(); }
BinaryBasicBlock & back() { return BasicBlocks.back(); }
BinaryFunction(StringRef Name, SymbolRef Symbol, SectionRef Section,
uint64_t Address, uint64_t Size, BinaryContext &BC) :
Name(Name), Symbol(Symbol), Section(Section), Address(Address),
Size(Size), BC(BC), CodeSectionName((".text." + Name).str()) {}
/// Perform optimal code layout based on edge frequencies making necessary
/// adjustments to instructions at the end of basic blocks.
void optimizeLayout();
/// View CFG in graphviz program
void viewGraph();
/// Basic block iterator
/// Return the name of the function as extracted from the binary file.
StringRef getName() const {
return Name;
}
/// Return symbol associated with the function start.
SymbolRef getSymbol() const {
return Symbol;
}
/// Return containing file section.
SectionRef getSection() const {
return Section;
}
/// Return original address of the function (or offset from base for PIC).
uint64_t getAddress() const {
return Address;
}
/// Return offset of the function body in the binary file.
uint64_t getFileOffset() const {
return FileOffset;
}
/// Return (original) size of the function.
uint64_t getSize() const {
return Size;
}
/// Return the maximum size the body of the function could have.
uint64_t getMaxSize() const {
return MaxSize;
}
/// Return internal section name for this function.
StringRef getCodeSectionName() const {
assert(!CodeSectionName.empty() && "no section name for function");
return StringRef(CodeSectionName);
}
/// Return true if the function could be correctly processed.
bool isSimple() const {
return IsSimple;
}
/// Return true if the given address \p PC is inside the function body.
bool containsAddress(uint64_t PC) const {
return Address <= PC && PC < Address + Size;
}
/// Create a basic block at a given \p Offset in the
/// function and append it to the end of list of blocks.
/// Returns NULL if basic block already exists at the \p Offset.
BinaryBasicBlock *addBasicBlock(uint64_t Offset, MCSymbol *Label = nullptr) {
assert(!getBasicBlockAtOffset(Offset) && "basic block already exists");
if (!Label)
Label = BC.Ctx->createTempSymbol("BB", true);
BasicBlocks.emplace_back(BinaryBasicBlock(Label, Offset));
return &BasicBlocks.back();
}
BinaryBasicBlock *getOrCreateBasicBlockAt(uint64_t Offset,
MCSymbol *Label = nullptr) {
BinaryBasicBlock *BB = getBasicBlockAtOffset(Offset);
if (!BB)
BB = addBasicBlock(Offset, Label);
return BB;
}
/// Return basic block that started at offset \p Offset.
BinaryBasicBlock *getBasicBlockAtOffset(uint64_t Offset) {
BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
if (BB && BB->Offset == Offset)
return BB;
return nullptr;
}
/// Return basic block that originally contained offset \p Offset
/// from the function start.
BinaryBasicBlock *getBasicBlockContainingOffset(uint64_t Offset) {
if (Offset > Size)
return nullptr;
if (BasicBlocks.empty())
return nullptr;
auto I = std::lower_bound(BasicBlocks.begin(),
BasicBlocks.end(),
BinaryBasicBlock(Offset));
if (I == BasicBlocks.end())
return &BasicBlocks.back();
return &(*I);
}
/// Dump function information to debug output. If \p PrintInstructions
/// is true - include instruction disassembly.
void dump(bool PrintInstructions = false) const {
print(dbgs(), PrintInstructions);
}
/// Print function information to the \p OS stream.
void print(raw_ostream &OS, bool PrintInstructions = false) const;
void addInstruction(uint64_t Offset, MCInst &&Instruction) {
Instructions.emplace(Offset, std::forward<MCInst>(Instruction));
}
BinaryFunction &setFileOffset(uint64_t Offset) {
FileOffset = Offset;
return *this;
}
BinaryFunction &setMaxSize(uint64_t Size) {
MaxSize = Size;
return *this;
}
BinaryFunction &setSimple(bool Simple) {
IsSimple = Simple;
return *this;
}
BinaryFunction &setAlignment(uint64_t Align) {
Alignment = Align;
return *this;
}
uint64_t getAlignment() const {
return Alignment;
}
BinaryFunction &setImageAddress(uint64_t Address) {
ImageAddress = Address;
return *this;
}
/// Return the address of this function' image in memory.
uint64_t getImageAddress() const {
return ImageAddress;
}
BinaryFunction &setImageSize(uint64_t Size) {
ImageSize = Size;
return *this;
}
/// Return the size of this function' image in memory.
uint64_t getImageSize() const {
return ImageSize;
}
/// Set the profile data for the number of times the function was called.
BinaryFunction &setExecutionCount(uint64_t Count) {
ExecutionCount = Count;
return *this;
}
/// Return the profile information about the number of times
/// the function was executed.
///
/// Return COUNT_NO_PROFILE if there's no profile info.
uint64_t getExecutionCount() const {
return ExecutionCount;
}
/// Disassemble function from raw data \p FunctionData.
/// If successful, this function will populate the list of instructions
/// for this function together with offsets from the function start
/// in the input. It will also populate Labels with destinations for
/// local branches, and LocalBranches with [from, to] info.
///
/// \p FunctionData is the set bytes representing the function body.
///
/// The Function should be properly initialized before this function
/// is called. I.e. function address and size should be set.
///
/// Returns true on successful disassembly, and updates the current
/// state to State:Disassembled.
///
/// Returns false if disassembly failed.
bool disassemble(ArrayRef<uint8_t> FunctionData);
/// Builds a list of basic blocks with successor and predecessor info.
///
/// The function should in Disassembled state prior to call.
///
/// Returns true on success and update the current function state to
/// State::CFG. Returns false if CFG cannot be built.
bool buildCFG();
virtual ~BinaryFunction() {}
};
inline raw_ostream &operator<<(raw_ostream &OS,
const BinaryFunction::State State) {
switch (State) {
default: OS << "<unknown>"; break;
case BinaryFunction::State::Empty: OS << "empty"; break;
case BinaryFunction::State::Disassembled: OS << "disassembled"; break;
case BinaryFunction::State::CFG: OS << "CFG constructed"; break;
case BinaryFunction::State::Assembled: OS << "assembled"; break;
}
return OS;
}
} // namespace flo
} // namespace llvm
#endif

View File

@ -2,7 +2,6 @@ set(LLVM_LINK_COMPONENTS
${LLVM_TARGETS_TO_BUILD}
CodeGen
Core
DebugInfoDWARF
MC
MCDisassembler
MCParser
@ -13,4 +12,6 @@ set(LLVM_LINK_COMPONENTS
add_llvm_tool(llvm-flo
llvm-flo.cpp
BinaryBasicBlock.cpp
BinaryFunction.cpp
)

View File

@ -19,4 +19,4 @@
type = Tool
name = llvm-flo
parent = Tools
required_libraries = DebugInfoDWARF MC MCDisassembler MCParser Object all-targets
required_libraries = MC MCDisassembler MCParser Object all-targets

View File

@ -7,12 +7,17 @@
//
//===----------------------------------------------------------------------===//
//
// This is a binary optimizer that will take 'perf' output and change
// basic block layout for better performance (a.k.a. branch straightening),
// plus some other optimizations that are better performed on a binary.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/STLExtras.h"
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
@ -20,7 +25,10 @@
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
@ -38,13 +46,20 @@
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Target/TargetMachine.h"
#include "BinaryBasicBlock.h"
#include "BinaryContext.h"
#include "BinaryFunction.h"
#include <algorithm>
#include <map>
#include <system_error>
#undef DEBUG_TYPE
#define DEBUG_TYPE "flo"
using namespace llvm;
using namespace object;
using namespace flo;
// Tool options.
static cl::opt<std::string>
@ -57,11 +72,16 @@ static cl::opt<std::string>
OutputFilename("o", cl::desc("<output file>"), cl::Required);
static cl::list<std::string>
FunctionNames("funcs", cl::desc("list of functions to optimzize"),
cl::Optional);
FunctionNames("funcs",
cl::CommaSeparated,
cl::desc("list of functions to optimize"),
cl::value_desc("func1,func2,func3,..."));
static cl::opt<bool>
EliminateUnreachable("eliminate-unreachable",
cl::desc("eliminate unreachable code"),
cl::Optional);
// Tool name used for reporting.
static StringRef ToolName;
static void report_error(StringRef Message, std::error_code EC) {
@ -70,6 +90,576 @@ static void report_error(StringRef Message, std::error_code EC) {
exit(1);
}
static void error(std::error_code EC) {
if (!EC)
return;
errs() << ToolName << ": error reading file: " << EC.message() << ".\n";
exit(1);
}
template <typename T>
static std::vector<T> singletonSet(T t) {
std::vector<T> Vec;
Vec.push_back(std::move(t));
return Vec;
}
/// Class responsible for allocating and managing code and data sections.
class ExecutableFileMemoryManager : public SectionMemoryManager {
public:
// Keep [section name] -> [allocated address, size] map for later remapping.
std::map<std::string, std::pair<uint64_t,uint64_t>> SectionAddressInfo;
ExecutableFileMemoryManager() {}
uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID,
StringRef SectionName) override {
auto ret =
SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID,
SectionName);
DEBUG(dbgs() << "FLO: allocating code section : " << SectionName
<< " with size " << Size << ", alignment " << Alignment
<< " at 0x" << ret << "\n");
SectionAddressInfo[SectionName] = {reinterpret_cast<uint64_t>(ret), Size};
return ret;
}
uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID, StringRef SectionName,
bool IsReadOnly) override {
DEBUG(dbgs() << "FLO: allocating data section : " << SectionName
<< " with size " << Size << ", alignment "
<< Alignment << "\n");
errs() << "FLO-WARNING: allocating data section.\n";
return SectionMemoryManager::allocateDataSection(Size, Alignment, SectionID,
SectionName, IsReadOnly);
}
// Tell EE that we guarantee we don't need stubs.
bool allowStubAllocation() const override { return false; }
bool finalizeMemory(std::string *ErrMsg = nullptr) override {
DEBUG(dbgs() << "FLO: finalizeMemory()\n");
return SectionMemoryManager::finalizeMemory(ErrMsg);
}
};
/// Create BinaryContext for a given architecture \p ArchName and
/// triple \p TripleName.
static std::unique_ptr<BinaryContext> CreateBinaryContext(
std::string ArchName,
std::string TripleName) {
std::string Error;
std::unique_ptr<Triple> TheTriple = llvm::make_unique<Triple>(TripleName);
const Target *TheTarget = TargetRegistry::lookupTarget(ArchName,
*TheTriple,
Error);
if (!TheTarget) {
errs() << ToolName << ": " << Error;
return nullptr;
}
std::unique_ptr<const MCRegisterInfo> MRI(
TheTarget->createMCRegInfo(TripleName));
if (!MRI) {
errs() << "error: no register info for target " << TripleName << "\n";
return nullptr;
}
// Set up disassembler.
std::unique_ptr<const MCAsmInfo> AsmInfo(
TheTarget->createMCAsmInfo(*MRI, TripleName));
if (!AsmInfo) {
errs() << "error: no assembly info for target " << TripleName << "\n";
return nullptr;
}
std::unique_ptr<const MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, "", ""));
if (!STI) {
errs() << "error: no subtarget info for target " << TripleName << "\n";
return nullptr;
}
std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
if (!MII) {
errs() << "error: no instruction info for target " << TripleName << "\n";
return nullptr;
}
std::unique_ptr<MCObjectFileInfo> MOFI =
llvm::make_unique<MCObjectFileInfo>();
std::unique_ptr<MCContext> Ctx =
llvm::make_unique<MCContext>(AsmInfo.get(), MRI.get(), MOFI.get());
MOFI->InitMCObjectFileInfo(*TheTriple, Reloc::Default,
CodeModel::Default, *Ctx);
std::unique_ptr<MCDisassembler> DisAsm(
TheTarget->createMCDisassembler(*STI, *Ctx));
if (!DisAsm) {
errs() << "error: no disassembler for target " << TripleName << "\n";
return nullptr;
}
std::unique_ptr<const MCInstrAnalysis> MIA(
TheTarget->createMCInstrAnalysis(MII.get()));
if (!MIA) {
errs() << "error: failed to create instruction analysis for target"
<< TripleName << "\n";
return nullptr;
}
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
std::unique_ptr<MCInstPrinter> InstructionPrinter(
TheTarget->createMCInstPrinter(Triple(TripleName), AsmPrinterVariant,
*AsmInfo, *MII, *MRI));
if (!InstructionPrinter) {
errs() << "error: no instruction printer for target " << TripleName
<< '\n';
return nullptr;
}
InstructionPrinter->setPrintImmHex(true);
auto MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, *Ctx);
auto MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, "");
// Make sure we don't miss any output on core dumps.
outs().SetUnbuffered();
errs().SetUnbuffered();
dbgs().SetUnbuffered();
auto BC =
llvm::make_unique<BinaryContext>(std::move(Ctx),
std::move(TheTriple),
TheTarget,
MCE,
std::move(MOFI),
std::move(AsmInfo),
std::move(MII),
std::move(STI),
std::move(InstructionPrinter),
std::move(MIA),
std::move(MRI),
std::move(DisAsm),
MAB);
return BC;
}
static void OptimizeFile(ELFObjectFileBase *File) {
// FIXME: there should be some way to extract arch and triple information
// from the file.
std::unique_ptr<BinaryContext> BC =
std::move(CreateBinaryContext("x86-64", "x86_64-unknown-linux"));
if (!BC) {
errs() << "failed to create a binary context\n";
return;
}
// Store all non-zero file symbols in this map for quick address lookup.
std::map<uint64_t, SymbolRef> FileSymRefs;
// Entry point to the binary.
//
// Note: this is ELF header entry point, but we could have more entry points
// from constructors etc.
BinaryFunction *EntryPointFunction{nullptr};
// Populate array of binary functions and file symbols
// from file symbol table.
//
// For local symbols we want to keep track of associated FILE symbol for
// disambiguation by name.
std::map<uint64_t, BinaryFunction> BinaryFunctions;
StringRef FileSymbolName;
for (const SymbolRef &Symbol : File->symbols()) {
// Keep undefined symbols for pretty printing?
if (Symbol.getFlags() & SymbolRef::SF_Undefined)
continue;
ErrorOr<StringRef> Name = Symbol.getName();
error(Name.getError());
if (Symbol.getType() == SymbolRef::ST_File) {
// Could be used for local symbol disambiguation.
FileSymbolName = *Name;
continue;
}
ErrorOr<uint64_t> AddressOrErr = Symbol.getAddress();
error(AddressOrErr.getError());
uint64_t Address = *AddressOrErr;
if (Address == 0) {
if (Symbol.getType() == SymbolRef::ST_Function)
errs() << "FLO-WARNING: function with 0 address seen\n";
continue;
}
FileSymRefs[Address] = Symbol;
// Only consider ST_Function symbols for functions. Although this
// assumption could be broken by assembly functions for which the type
// could be wrong.
if (Symbol.getType() != SymbolRef::ST_Function) {
// FIXME: add it to the address map.
continue;
}
// TODO: populate address map with PLT entries for better readability.
// Ignore function with 0 size for now (possibly coming from assembly).
auto SymbolSize = ELFSymbolRef(Symbol).getSize();
if (SymbolSize == 0)
continue;
// There's nothing horribly wrong with anonymous symbols, but let's
// ignore them for now.
if (Name->empty())
continue;
ErrorOr<section_iterator> SectionOrErr = Symbol.getSection();
error(SectionOrErr.getError());
section_iterator Section = *SectionOrErr;
if (Section == File->section_end()) {
// Could be an absolute symbol. Could record for pretty printing.
continue;
}
// Disambiguate local function name. Since we don't know if we'll see
// a global with the same name, always modify the local function name.
std::string UniqueFunctionName;
if (!(Symbol.getFlags() & SymbolRef::SF_Global)) {
unsigned LocalCount = 1;
auto LocalName = *Name + "/" + FileSymbolName + "/";
while (BC->GlobalSymbols.find((LocalName + Twine(LocalCount)).str()) !=
BC->GlobalSymbols.end()) {
++LocalCount;
}
UniqueFunctionName = (LocalName + Twine(LocalCount)).str();
} else {
auto I = BC->GlobalSymbols.find(*Name);
assert(I == BC->GlobalSymbols.end() && "global name not unique");
UniqueFunctionName = *Name;
}
// Create the function and add to the map.
BinaryFunctions.emplace(
Address,
BinaryFunction(UniqueFunctionName, Symbol, *Section, Address,
SymbolSize, *BC)
);
// Add the name to global symbols map.
BC->GlobalSymbols[UniqueFunctionName] = Address;
// Add to the reverse map.
BC->GlobalAddresses.emplace(std::make_pair(Address, UniqueFunctionName));
}
// Disassemble every function and build it's control flow graph.
for (auto &BFI : BinaryFunctions) {
BinaryFunction &Function = BFI.second;
SectionRef Section = Function.getSection();
assert(Section.containsSymbol(Function.getSymbol()) &&
"symbol not in section");
// When could it happen?
if (!Section.isText() || Section.isVirtual() || !Section.getSize()) {
DEBUG(dbgs() << "FLO: corresponding section non-executable or empty "
<< "for function " << Function.getName());
continue;
}
// Set the proper maximum size value after the whole symbol table
// has been processed.
auto SymRefI = FileSymRefs.upper_bound(Function.getAddress());
if (SymRefI != FileSymRefs.end()) {
auto MaxSize = SymRefI->first - Function.getAddress();
assert(MaxSize >= Function.getSize() &&
"symbol seen in the middle of the function");
Function.setMaxSize(MaxSize);
}
StringRef SectionContents;
error(Section.getContents(SectionContents));
assert(SectionContents.size() == Section.getSize() &&
"section size mismatch");
// Function offset from the section start.
auto FunctionOffset = Function.getAddress() - Section.getAddress();
// Offset of the function in the file.
Function.setFileOffset(
SectionContents.data() - File->getData().data() + FunctionOffset);
ArrayRef<uint8_t> FunctionData(
reinterpret_cast<const uint8_t *>
(SectionContents.data()) + FunctionOffset,
Function.getSize());
if (!Function.disassemble(FunctionData))
continue;
if (!Function.buildCFG())
continue;
} // Iterate over all functions
// Run optimization passes.
//
// FIXME: use real optimization passes.
for (auto &BFI : BinaryFunctions) {
auto &Function = BFI.second;
// Detect and eliminate unreachable basic blocks. We could have those
// filled with nops and they are used for alignment.
//
// FIXME: this wouldn't work with C++ exceptions until we implement
// support for those as there will be "invisible" edges
// in the graph.
if (EliminateUnreachable) {
bool IsFirst = true;
for (auto &BB : Function) {
if (!IsFirst && BB.pred_empty()) {
outs() << "FLO: basic block " << BB.getName() << " in function "
<< Function.getName() << " is dead\n";
// TODO: currently lacking interface to eliminate basic block.
}
IsFirst = false;
}
DEBUG(dbgs() << "*** After unreachable block elimination ***\n");
DEBUG(Function.print(dbgs(), /* PrintInstructions = */ true));
}
}
std::error_code EC;
std::unique_ptr<tool_output_file> Out =
llvm::make_unique<tool_output_file>(OutputFilename + ".o",
EC,sys::fs::F_None);
if (EC) {
// FIXME: handle error
return;
}
std::unique_ptr<tool_output_file> RealOut =
llvm::make_unique<tool_output_file>(OutputFilename, EC, sys::fs::F_None,
0777);
if (EC) {
// FIXME: handle error
return;
}
// Copy input file.
RealOut->os() << File->getData();
std::unique_ptr<buffer_ostream> BOS =
make_unique<buffer_ostream>(Out->os());
raw_pwrite_stream *OS = BOS.get();
// Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB)
// and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these
// two instances.
std::unique_ptr<MCStreamer> Streamer(
BC->TheTarget->createMCObjectStreamer(*BC->TheTriple,
*BC->Ctx,
*BC->MAB,
*OS,
BC->MCE,
*BC->STI,
/* RelaxAll */ false,
/* DWARFMustBeAtTheEnd */ false));
Streamer->InitSections(false);
// Output functions one by one.
for (auto &BFI : BinaryFunctions) {
auto &Function = BFI.second;
if (!Function.isSimple())
continue;
// Only overwrite functions from the list if non-empty.
if (!FunctionNames.empty()) {
bool IsValid = false;
for (auto &Name : FunctionNames) {
if (Function.getName() == Name) {
IsValid = true;
break;
}
}
if (!IsValid)
continue;
}
DEBUG(dbgs() << "FLO: generating code for function \""
<< Function.getName() << "\"\n");
// No need for human readability?
// FIXME: what difference does it make in reality?
//Ctx.setUseNamesOnTempLabels(false);
// Emit function start
// Each fuction is emmitted into its own section.
MCSectionELF *FunctionSection =
BC->Ctx->getELFSection(Function.getCodeSectionName(),
ELF::SHT_PROGBITS,
ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
MCSection *Section = FunctionSection;
Streamer->SwitchSection(Section);
Streamer->EmitCodeAlignment(Function.getAlignment());
MCSymbol *FunctionSymbol = BC->Ctx->getOrCreateSymbol(Function.getName());
Streamer->EmitSymbolAttribute(FunctionSymbol, MCSA_ELF_TypeFunction);
Streamer->EmitLabel(FunctionSymbol);
// Emit code.
for (const auto &BB : Function) {
Streamer->EmitLabel(BB.getLabel());
for (const auto &Instr : BB) {
Streamer->EmitInstruction(Instr, *BC->STI);
}
}
// TODO: is there any use in emiting end of function?
// Perhaps once we have a support for C++ exceptions.
//auto FunctionEndLabel = Ctx.createTempSymbol("func_end");
//Streamer->EmitLabel(FunctionEndLabel);
//Streamer->emitELFSize(FunctionSymbol, MCExpr());
}
Streamer->Finish();
// Get output object as ObjectFile.
std::unique_ptr<MemoryBuffer> ObjectMemBuffer =
MemoryBuffer::getMemBuffer(BOS->str(), "in-memory object file", false);
ErrorOr<std::unique_ptr<object::ObjectFile>> ObjOrErr =
object::ObjectFile::createObjectFile(ObjectMemBuffer->getMemBufferRef());
if (std::error_code EC = ObjOrErr.getError()) {
report_error(InputFilename, EC);
return;
}
std::unique_ptr<ExecutableFileMemoryManager>
EFMM(new ExecutableFileMemoryManager());
// FIXME: use notifyObjectLoaded() to remap sections.
DEBUG(dbgs() << "Creating OLT\n");
// Run ObjectLinkingLayer() with custom memory manager and symbol resolver.
orc::ObjectLinkingLayer<> OLT;
auto Resolver = orc::createLambdaResolver(
[&](const std::string &Name) {
DEBUG(dbgs() << "FLO: looking for " << Name << "\n");
auto I = BC->GlobalSymbols.find(Name);
if (I == BC->GlobalSymbols.end())
return RuntimeDyld::SymbolInfo(nullptr);
return RuntimeDyld::SymbolInfo(I->second,
JITSymbolFlags::None);
},
[](const std::string &S) {
DEBUG(dbgs() << "FLO: resolving " << S << "\n");
return nullptr;
}
);
// FIXME:
auto ObjectsHandle = OLT.addObjectSet(
singletonSet(std::move(ObjOrErr.get())),
EFMM.get(),
//std::move(EFMM),
std::move(Resolver));
//OLT.takeOwnershipOfBuffers(ObjectsHandle, );
// Map every function/section current address in memory to that in
// the output binary.
for (auto &BFI : BinaryFunctions) {
auto &Function = BFI.second;
if (!Function.isSimple())
continue;
auto SAI = EFMM->SectionAddressInfo.find(Function.getCodeSectionName());
if (SAI != EFMM->SectionAddressInfo.end()) {
DEBUG(dbgs() << "FLO: mapping 0x" << Twine::utohexstr(SAI->second.first)
<< " to 0x" << Twine::utohexstr(Function.getAddress())
<< '\n');
OLT.mapSectionAddress(ObjectsHandle,
reinterpret_cast<const void*>(SAI->second.first),
Function.getAddress());
Function.setImageAddress(SAI->second.first);
Function.setImageSize(SAI->second.second);
} else {
errs() << "FLO: cannot remap function " << Function.getName() << "\n";
}
}
OLT.emitAndFinalize(ObjectsHandle);
// FIXME: is there a less painful way to obtain assembler/writer?
auto &Writer =
static_cast<MCObjectStreamer*>(Streamer.get())->getAssembler().getWriter();
Writer.setStream(RealOut->os());
// Overwrite function in the output file.
for (auto &BFI : BinaryFunctions) {
auto &Function = BFI.second;
if (Function.getImageAddress() == 0 || Function.getImageSize() == 0)
continue;
if (Function.getImageSize() > Function.getMaxSize()) {
errs() << "FLO-WARNING: new function size (0x"
<< Twine::utohexstr(Function.getImageSize())
<< ") is larger than maximum allowed size (0x"
<< Twine::utohexstr(Function.getMaxSize())
<< ") for function " << Function.getName() << '\n';
continue;
}
// Overwrite function in the output file.
outs() << "FLO: rewriting function \"" << Function.getName() << "\"\n";
RealOut->os().pwrite(
reinterpret_cast<char *>(Function.getImageAddress()),
Function.getImageSize(),
Function.getFileOffset());
// Write nops at the end of the function.
auto Pos = RealOut->os().tell();
RealOut->os().seek(Function.getFileOffset() + Function.getImageSize());
BC->MAB->writeNopData(Function.getMaxSize() - Function.getImageSize(),
&Writer);
RealOut->os().seek(Pos);
}
if (EntryPointFunction) {
DEBUG(dbgs() << "FLO: entry point function is "
<< EntryPointFunction->getName() << '\n');
} else {
DEBUG(dbgs() << "FLO: no entry point function was set\n");
}
// TODO: we should find a way to mark the binary as optimized by us.
Out->keep();
RealOut->keep();
}
int main(int argc, char **argv) {
// Print a stack trace if we signal out.
sys::PrintStackTraceOnErrorSignal();
@ -104,7 +694,7 @@ int main(int argc, char **argv) {
Binary &Binary = *BinaryOrErr.get().getBinary();
if (ELFObjectFileBase *e = dyn_cast<ELFObjectFileBase>(&Binary)) {
outs() << "mind blown : " << e << "!\n";
OptimizeFile(e);
} else {
report_error(InputFilename, object_error::invalid_file_type);
}