mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-30 17:21:10 +00:00
Commit FLO with control flow graph.
Summary: llvm-flo disassembles, builds control flow graph, and re-writes simple functions. (cherry picked from FBD2524024)
This commit is contained in:
parent
7927c14ff5
commit
9a2fe7ebe4
65
bolt/BinaryBasicBlock.cpp
Normal file
65
bolt/BinaryBasicBlock.cpp
Normal file
@ -0,0 +1,65 @@
|
||||
//===--- BinaryBasicBlock.cpp - Interface for assembly-level basic block --===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCInstPrinter.h"
|
||||
#include <limits>
|
||||
#include <string>
|
||||
|
||||
#include "BinaryBasicBlock.h"
|
||||
#include "BinaryFunction.h"
|
||||
|
||||
#undef DEBUG_TYPE
|
||||
#define DEBUG_TYPE "flo"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
namespace flo {
|
||||
|
||||
bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS) {
|
||||
return LHS.Offset < RHS.Offset;
|
||||
}
|
||||
|
||||
void BinaryBasicBlock::addSuccessor(BinaryBasicBlock *Succ,
|
||||
uint64_t Count,
|
||||
uint64_t MispredictedCount) {
|
||||
Successors.push_back(Succ);
|
||||
Succ->Predecessors.push_back(this);
|
||||
|
||||
// TODO: update weights.
|
||||
}
|
||||
|
||||
void BinaryBasicBlock::removeSuccessor(BinaryBasicBlock *Succ) {
|
||||
Succ->removePredecessor(this);
|
||||
auto I = std::find(succ_begin(), succ_end(), Succ);
|
||||
assert(I != succ_end() && "no such successor!");
|
||||
|
||||
Successors.erase(I);
|
||||
|
||||
// TODO: update weights.
|
||||
}
|
||||
|
||||
void BinaryBasicBlock::addPredecessor(BinaryBasicBlock *Pred) {
|
||||
Predecessors.push_back(Pred);
|
||||
}
|
||||
|
||||
void BinaryBasicBlock::removePredecessor(BinaryBasicBlock *Pred) {
|
||||
auto I = std::find(pred_begin(), pred_end(), Pred);
|
||||
assert(I != pred_end() && "Pred is not a predecessor of this block!");
|
||||
Predecessors.erase(I);
|
||||
}
|
||||
|
||||
} // namespace flo
|
||||
|
||||
} // namespace llvm
|
212
bolt/BinaryBasicBlock.h
Normal file
212
bolt/BinaryBasicBlock.h
Normal file
@ -0,0 +1,212 @@
|
||||
//===--- BinaryBasicBlock.h - Interface for assembly-level basic block ----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// TODO: memory management for instructions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_FLO_BINARY_BASIC_BLOCK_H
|
||||
#define LLVM_TOOLS_LLVM_FLO_BINARY_BASIC_BLOCK_H
|
||||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/ilist.h"
|
||||
#include "llvm/MC/MCCodeEmitter.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCInstPrinter.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/Object/ObjectFile.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <limits>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
namespace flo {
|
||||
|
||||
class BinaryFunction;
|
||||
|
||||
/// The intention is to keep the structure similar to MachineBasicBlock as
|
||||
/// we might switch to it at some point.
|
||||
class BinaryBasicBlock {
|
||||
|
||||
/// Label associated with the block.
|
||||
MCSymbol *Label{nullptr};
|
||||
|
||||
/// Original offset in the function.
|
||||
uint64_t Offset{std::numeric_limits<uint64_t>::max()};
|
||||
|
||||
/// Alignment requirements for the block.
|
||||
uint64_t Alignment{1};
|
||||
|
||||
/// Vector of all instructions in the block.
|
||||
std::vector<MCInst> Instructions;
|
||||
|
||||
/// CFG information.
|
||||
std::vector<BinaryBasicBlock *> Predecessors;
|
||||
std::vector<BinaryBasicBlock *> Successors;
|
||||
|
||||
struct BinaryBranchInfo {
|
||||
uint64_t Count;
|
||||
uint64_t MispredictedCount; /// number of branches mispredicted
|
||||
};
|
||||
|
||||
/// Each successor has a corresponding BranchInfo entry in the list.
|
||||
std::vector<BinaryBranchInfo> BranchInfo;
|
||||
typedef std::vector<BinaryBranchInfo>::iterator branch_info_iterator;
|
||||
typedef std::vector<BinaryBranchInfo>::const_iterator
|
||||
const_branch_info_iterator;
|
||||
|
||||
BinaryBasicBlock() {}
|
||||
|
||||
explicit BinaryBasicBlock(
|
||||
MCSymbol *Label,
|
||||
uint64_t Offset = std::numeric_limits<uint64_t>::max())
|
||||
: Label(Label), Offset(Offset) {}
|
||||
|
||||
explicit BinaryBasicBlock(uint64_t Offset)
|
||||
: Offset(Offset) {}
|
||||
|
||||
// Exclusively managed by BinaryFunction.
|
||||
friend class BinaryFunction;
|
||||
friend bool operator<(const BinaryBasicBlock &LHS,
|
||||
const BinaryBasicBlock &RHS);
|
||||
|
||||
public:
|
||||
|
||||
// Instructions iterators.
|
||||
typedef std::vector<MCInst>::iterator iterator;
|
||||
typedef std::vector<MCInst>::const_iterator const_iterator;
|
||||
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
|
||||
typedef std::reverse_iterator<iterator> reverse_iterator;
|
||||
|
||||
MCInst &front() { return Instructions.front(); }
|
||||
MCInst &back() { return Instructions.back(); }
|
||||
const MCInst &front() const { return Instructions.front(); }
|
||||
const MCInst &back() const { return Instructions.back(); }
|
||||
|
||||
iterator begin() { return Instructions.begin(); }
|
||||
const_iterator begin() const { return Instructions.begin(); }
|
||||
iterator end () { return Instructions.end(); }
|
||||
const_iterator end () const { return Instructions.end(); }
|
||||
reverse_iterator rbegin() { return Instructions.rbegin(); }
|
||||
const_reverse_iterator rbegin() const { return Instructions.rbegin(); }
|
||||
reverse_iterator rend () { return Instructions.rend(); }
|
||||
const_reverse_iterator rend () const { return Instructions.rend(); }
|
||||
|
||||
// CFG iterators.
|
||||
typedef std::vector<BinaryBasicBlock *>::iterator pred_iterator;
|
||||
typedef std::vector<BinaryBasicBlock *>::const_iterator const_pred_iterator;
|
||||
typedef std::vector<BinaryBasicBlock *>::iterator succ_iterator;
|
||||
typedef std::vector<BinaryBasicBlock *>::const_iterator const_succ_iterator;
|
||||
typedef std::vector<BinaryBasicBlock *>::reverse_iterator
|
||||
pred_reverse_iterator;
|
||||
typedef std::vector<BinaryBasicBlock *>::const_reverse_iterator
|
||||
const_pred_reverse_iterator;
|
||||
typedef std::vector<BinaryBasicBlock *>::reverse_iterator
|
||||
succ_reverse_iterator;
|
||||
typedef std::vector<BinaryBasicBlock *>::const_reverse_iterator
|
||||
const_succ_reverse_iterator;
|
||||
pred_iterator pred_begin() { return Predecessors.begin(); }
|
||||
const_pred_iterator pred_begin() const { return Predecessors.begin(); }
|
||||
pred_iterator pred_end() { return Predecessors.end(); }
|
||||
const_pred_iterator pred_end() const { return Predecessors.end(); }
|
||||
pred_reverse_iterator pred_rbegin()
|
||||
{ return Predecessors.rbegin();}
|
||||
const_pred_reverse_iterator pred_rbegin() const
|
||||
{ return Predecessors.rbegin();}
|
||||
pred_reverse_iterator pred_rend()
|
||||
{ return Predecessors.rend(); }
|
||||
const_pred_reverse_iterator pred_rend() const
|
||||
{ return Predecessors.rend(); }
|
||||
unsigned pred_size() const {
|
||||
return (unsigned)Predecessors.size();
|
||||
}
|
||||
bool pred_empty() const { return Predecessors.empty(); }
|
||||
|
||||
succ_iterator succ_begin() { return Successors.begin(); }
|
||||
const_succ_iterator succ_begin() const { return Successors.begin(); }
|
||||
succ_iterator succ_end() { return Successors.end(); }
|
||||
const_succ_iterator succ_end() const { return Successors.end(); }
|
||||
succ_reverse_iterator succ_rbegin()
|
||||
{ return Successors.rbegin(); }
|
||||
const_succ_reverse_iterator succ_rbegin() const
|
||||
{ return Successors.rbegin(); }
|
||||
succ_reverse_iterator succ_rend()
|
||||
{ return Successors.rend(); }
|
||||
const_succ_reverse_iterator succ_rend() const
|
||||
{ return Successors.rend(); }
|
||||
unsigned succ_size() const {
|
||||
return (unsigned)Successors.size();
|
||||
}
|
||||
bool succ_empty() const { return Successors.empty(); }
|
||||
|
||||
inline iterator_range<pred_iterator> predecessors() {
|
||||
return iterator_range<pred_iterator>(pred_begin(), pred_end());
|
||||
}
|
||||
inline iterator_range<const_pred_iterator> predecessors() const {
|
||||
return iterator_range<const_pred_iterator>(pred_begin(), pred_end());
|
||||
}
|
||||
inline iterator_range<succ_iterator> successors() {
|
||||
return iterator_range<succ_iterator>(succ_begin(), succ_end());
|
||||
}
|
||||
inline iterator_range<const_succ_iterator> successors() const {
|
||||
return iterator_range<const_succ_iterator>(succ_begin(), succ_end());
|
||||
}
|
||||
|
||||
/// Return symbol marking the start of this basic block.
|
||||
MCSymbol *getLabel() const {
|
||||
return Label;
|
||||
}
|
||||
|
||||
/// Return local name for the block.
|
||||
StringRef getName() const {
|
||||
return Label->getName();
|
||||
}
|
||||
|
||||
/// Add instruction at the end of this basic block.
|
||||
void addInstruction(MCInst &Inst) {
|
||||
Instructions.emplace_back(Inst);
|
||||
}
|
||||
|
||||
/// Return required alignment for the block.
|
||||
uint64_t getAlignment() const {
|
||||
return Alignment;
|
||||
}
|
||||
|
||||
/// Adds block to successor list, and also updates predecessor list for
|
||||
/// successor block.
|
||||
/// Set branch info for this path.
|
||||
void addSuccessor(BinaryBasicBlock *Succ,
|
||||
uint64_t Count = 0,
|
||||
uint64_t MispredictedCount = 0);
|
||||
|
||||
/// Remove /p Succ basic block from the list of successors. Update the
|
||||
/// list of predecessors of /p Succ and update branch info.
|
||||
void removeSuccessor(BinaryBasicBlock *Succ);
|
||||
|
||||
private:
|
||||
|
||||
/// Adds predecessor to the BB. Most likely you don't need to call this.
|
||||
void addPredecessor(BinaryBasicBlock *Pred);
|
||||
|
||||
/// Remove predecessor of the basic block. Don't use directly, instead
|
||||
/// use removeSuccessor() funciton.
|
||||
void removePredecessor(BinaryBasicBlock *Pred);
|
||||
};
|
||||
|
||||
bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS);
|
||||
|
||||
|
||||
} // namespace flo
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
114
bolt/BinaryContext.h
Normal file
114
bolt/BinaryContext.h
Normal file
@ -0,0 +1,114 @@
|
||||
//===--- BinaryContext.h - Interface for machine-level context -----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_FLO_BINARY_CONTEXT_H
|
||||
#define LLVM_TOOLS_LLVM_FLO_BINARY_CONTEXT_H
|
||||
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/MC/MCAsmBackend.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/MC/MCCodeEmitter.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCDisassembler.h"
|
||||
#include "llvm/MC/MCInstPrinter.h"
|
||||
#include "llvm/MC/MCInstrAnalysis.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCObjectFileInfo.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <system_error>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
namespace flo {
|
||||
|
||||
/// Everything that's needed to process binaries lives here.
|
||||
class BinaryContext {
|
||||
|
||||
BinaryContext() = delete;
|
||||
|
||||
public:
|
||||
|
||||
// [name] -> [address]
|
||||
typedef std::map<std::string, uint64_t> SymbolMapType;
|
||||
SymbolMapType GlobalSymbols;
|
||||
|
||||
// [address] -> [name1], [name2], ...
|
||||
std::multimap<uint64_t, std::string> GlobalAddresses;
|
||||
|
||||
std::unique_ptr<MCContext> Ctx;
|
||||
|
||||
std::unique_ptr<Triple> TheTriple;
|
||||
|
||||
const Target *TheTarget;
|
||||
|
||||
MCCodeEmitter *MCE;
|
||||
|
||||
std::unique_ptr<MCObjectFileInfo> MOFI;
|
||||
|
||||
std::unique_ptr<const MCAsmInfo> AsmInfo;
|
||||
|
||||
std::unique_ptr<const MCInstrInfo> MII;
|
||||
|
||||
std::unique_ptr<const MCSubtargetInfo> STI;
|
||||
|
||||
std::unique_ptr<MCInstPrinter> InstPrinter;
|
||||
|
||||
std::unique_ptr<const MCInstrAnalysis> MIA;
|
||||
|
||||
std::unique_ptr<const MCRegisterInfo> MRI;
|
||||
|
||||
std::unique_ptr<MCDisassembler> DisAsm;
|
||||
|
||||
std::function<void(std::error_code)> ErrorCheck;
|
||||
|
||||
MCAsmBackend *MAB;
|
||||
|
||||
BinaryContext(std::unique_ptr<MCContext> Ctx,
|
||||
std::unique_ptr<Triple> TheTriple,
|
||||
const Target *TheTarget,
|
||||
MCCodeEmitter *MCE,
|
||||
std::unique_ptr<MCObjectFileInfo> MOFI,
|
||||
std::unique_ptr<const MCAsmInfo> AsmInfo,
|
||||
std::unique_ptr<const MCInstrInfo> MII,
|
||||
std::unique_ptr<const MCSubtargetInfo> STI,
|
||||
std::unique_ptr<MCInstPrinter> InstPrinter,
|
||||
std::unique_ptr<const MCInstrAnalysis> MIA,
|
||||
std::unique_ptr<const MCRegisterInfo> MRI,
|
||||
std::unique_ptr<MCDisassembler> DisAsm,
|
||||
MCAsmBackend *MAB) :
|
||||
Ctx(std::move(Ctx)),
|
||||
TheTriple(std::move(TheTriple)),
|
||||
TheTarget(TheTarget),
|
||||
MCE(MCE),
|
||||
MOFI(std::move(MOFI)),
|
||||
AsmInfo(std::move(AsmInfo)),
|
||||
MII(std::move(MII)),
|
||||
STI(std::move(STI)),
|
||||
InstPrinter(std::move(InstPrinter)),
|
||||
MIA(std::move(MIA)),
|
||||
MRI(std::move(MRI)),
|
||||
DisAsm(std::move(DisAsm)),
|
||||
MAB(MAB) {}
|
||||
|
||||
~BinaryContext() {}
|
||||
};
|
||||
|
||||
} // namespace flo
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
381
bolt/BinaryFunction.cpp
Normal file
381
bolt/BinaryFunction.cpp
Normal file
@ -0,0 +1,381 @@
|
||||
//===--- BinaryFunction.cpp - Interface for machine-level function --------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCInstPrinter.h"
|
||||
#include "llvm/Object/ObjectFile.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <limits>
|
||||
#include <string>
|
||||
|
||||
#include "BinaryBasicBlock.h"
|
||||
#include "BinaryFunction.h"
|
||||
|
||||
#undef DEBUG_TYPE
|
||||
#define DEBUG_TYPE "flo"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
namespace flo {
|
||||
|
||||
void BinaryFunction::print(raw_ostream &OS, bool PrintInstructions) const {
|
||||
StringRef SectionName;
|
||||
Section.getName(SectionName);
|
||||
OS << "Binary Function \"" << getName() << "\" {"
|
||||
<< "\n State : " << CurrentState
|
||||
<< "\n Address : 0x" << Twine::utohexstr(Address)
|
||||
<< "\n Size : 0x" << Twine::utohexstr(Size)
|
||||
<< "\n MaxSize : 0x" << Twine::utohexstr(MaxSize)
|
||||
<< "\n Offset : 0x" << Twine::utohexstr(FileOffset)
|
||||
<< "\n Section : " << SectionName
|
||||
<< "\n Orc Section : " << getCodeSectionName()
|
||||
<< "\n IsSimple : " << IsSimple
|
||||
<< "\n BB count : " << BasicBlocks.size()
|
||||
<< "\n Image : 0x" << Twine::utohexstr(ImageAddress)
|
||||
<< "\n}\n";
|
||||
|
||||
if (!PrintInstructions || !BC.InstPrinter)
|
||||
return;
|
||||
|
||||
// Offset of the instruction in function.
|
||||
uint64_t Offset{0};
|
||||
|
||||
if (BasicBlocks.empty() && !Instructions.empty()) {
|
||||
// Print before CFG was built.
|
||||
for (const auto &II : Instructions) {
|
||||
auto Offset = II.first;
|
||||
|
||||
// Print label if exists at this offset.
|
||||
auto LI = Labels.find(Offset);
|
||||
if (LI != Labels.end())
|
||||
OS << LI->second->getName() << ":\n";
|
||||
|
||||
auto &Instruction = II.second;
|
||||
OS << format(" %08" PRIx64 ": ", Offset);
|
||||
BC.InstPrinter->printInst(&Instruction, OS, "", *BC.STI);
|
||||
OS << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &BB : BasicBlocks) {
|
||||
OS << BB.getName() << " ("
|
||||
<< BB.Instructions.size() << " instructions)\n";
|
||||
|
||||
if (!BB.Predecessors.empty()) {
|
||||
OS << " Predecessors: ";
|
||||
auto Sep = "";
|
||||
for (auto Pred : BB.Predecessors) {
|
||||
OS << Sep << Pred->getName();
|
||||
Sep = ", ";
|
||||
}
|
||||
OS << '\n';
|
||||
}
|
||||
|
||||
Offset = RoundUpToAlignment(Offset, BB.getAlignment());
|
||||
|
||||
for (auto &Instr : BB) {
|
||||
OS << format(" %08" PRIx64 ": ", Offset);
|
||||
BC.InstPrinter->printInst(&Instr, OS, "", *BC.STI);
|
||||
OS << "\n";
|
||||
|
||||
// In case we need MCInst printer:
|
||||
// Instr.dump_pretty(OS, InstructionPrinter.get());
|
||||
|
||||
// Calculate the size of the instruction.
|
||||
// Note: this is imprecise since happening prior to relaxation.
|
||||
SmallString<256> Code;
|
||||
SmallVector<MCFixup, 4> Fixups;
|
||||
raw_svector_ostream VecOS(Code);
|
||||
BC.MCE->encodeInstruction(Instr, VecOS, Fixups, *BC.STI);
|
||||
Offset += Code.size();
|
||||
}
|
||||
|
||||
if (!BB.Successors.empty()) {
|
||||
OS << " Successors: ";
|
||||
auto Sep = "";
|
||||
for (auto Succ : BB.Successors) {
|
||||
OS << Sep << Succ->getName();
|
||||
Sep = ", ";
|
||||
}
|
||||
OS << '\n';
|
||||
}
|
||||
|
||||
OS << '\n';
|
||||
}
|
||||
|
||||
OS << "End of Function \"" << getName() << "\"\n";
|
||||
}
|
||||
|
||||
bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
||||
assert(FunctionData.size() == getSize() &&
|
||||
"function size does not match raw data size");
|
||||
|
||||
auto &Ctx = BC.Ctx;
|
||||
auto &MIA = BC.MIA;
|
||||
|
||||
// Insert a label at the beginning of the function. This will be our first
|
||||
// basic block.
|
||||
Labels[0] = Ctx->createTempSymbol("BB0", false);
|
||||
|
||||
bool IsSimple = true;
|
||||
for (uint64_t Offset = 0; IsSimple && (Offset < getSize()); ) {
|
||||
MCInst Instruction;
|
||||
uint64_t Size;
|
||||
if (!BC.DisAsm->getInstruction(Instruction,
|
||||
Size,
|
||||
FunctionData.slice(Offset),
|
||||
getAddress() + Offset,
|
||||
nulls(),
|
||||
nulls())) {
|
||||
// Ignore this function. Skip to the next one.
|
||||
IsSimple = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (MIA->isIndirectBranch(Instruction)) {
|
||||
IsSimple = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (MIA->isBranch(Instruction) || MIA->isCall(Instruction)) {
|
||||
uint64_t InstructionTarget = 0;
|
||||
uint64_t AbsoluteInstrAddr = getAddress() + Offset;
|
||||
if (MIA->evaluateBranch(Instruction,
|
||||
AbsoluteInstrAddr,
|
||||
Size,
|
||||
InstructionTarget)) {
|
||||
// Check if the target is within the same function. Otherwise it's
|
||||
// a call, possibly a tail call.
|
||||
//
|
||||
// If the target *is* the function address it could be either a branch
|
||||
// or a recursive call.
|
||||
bool IsCall = MIA->isCall(Instruction);
|
||||
MCSymbol *TargetSymbol{nullptr};
|
||||
uint64_t TargetOffset{0};
|
||||
|
||||
if (IsCall && containsAddress(InstructionTarget)) {
|
||||
if (InstructionTarget == getAddress()) {
|
||||
// Recursive call.
|
||||
TargetSymbol = Ctx->getOrCreateSymbol(getName());
|
||||
} else {
|
||||
// Possibly an old-style PIC code
|
||||
DEBUG(dbgs() << "FLO: internal call detected at 0x"
|
||||
<< Twine::utohexstr(AbsoluteInstrAddr)
|
||||
<< " in function " << getName() << "\n");
|
||||
IsSimple = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!TargetSymbol) {
|
||||
// Create either local label or external symbol.
|
||||
if (containsAddress(InstructionTarget)) {
|
||||
// Check if there's already a registered label.
|
||||
TargetOffset = InstructionTarget - getAddress();
|
||||
auto LI = Labels.find(TargetOffset);
|
||||
if (LI == Labels.end()) {
|
||||
TargetSymbol = Ctx->createTempSymbol();
|
||||
Labels[TargetOffset] = TargetSymbol;
|
||||
} else {
|
||||
TargetSymbol = LI->second;
|
||||
}
|
||||
} else {
|
||||
// This is a call regardless of the opcode (e.g. tail call).
|
||||
IsCall = true;
|
||||
// Check if we already have a symbol at this address.
|
||||
std::string Name;
|
||||
auto NI = BC.GlobalAddresses.find(InstructionTarget);
|
||||
if (NI != BC.GlobalAddresses.end()) {
|
||||
// Any registered name will do.
|
||||
Name = NI->second;
|
||||
} else {
|
||||
// Create a new symbol at the destination.
|
||||
Name = (Twine("FUNCat0x") +
|
||||
Twine::utohexstr(InstructionTarget)).str();
|
||||
BC.GlobalAddresses.emplace(std::make_pair(InstructionTarget,
|
||||
Name));
|
||||
}
|
||||
TargetSymbol = Ctx->getOrCreateSymbol(Name);
|
||||
BC.GlobalSymbols[Name] = InstructionTarget;
|
||||
}
|
||||
}
|
||||
|
||||
Instruction.clear();
|
||||
Instruction.addOperand(
|
||||
MCOperand::createExpr(
|
||||
MCSymbolRefExpr::create(TargetSymbol,
|
||||
MCSymbolRefExpr::VK_None,
|
||||
*Ctx)));
|
||||
if (!IsCall) {
|
||||
// Add local branch info.
|
||||
LocalBranches.push_back({Offset, TargetOffset});
|
||||
}
|
||||
|
||||
} else {
|
||||
// Indirect call
|
||||
IsSimple = false;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (MIA->hasRIPOperand(Instruction)) {
|
||||
DEBUG(dbgs() << "FLO: rip-relative instruction found "
|
||||
"(not supported yet)\n");
|
||||
IsSimple = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
addInstruction(Offset, std::move(Instruction));
|
||||
|
||||
Offset += Size;
|
||||
}
|
||||
|
||||
setSimple(IsSimple);
|
||||
|
||||
// TODO: clear memory if not simple function?
|
||||
|
||||
// Update state.
|
||||
updateState(State::Disassembled);
|
||||
|
||||
// Print the function in the new state.
|
||||
DEBUG(print(dbgs(), /* PrintInstructions = */ true));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BinaryFunction::buildCFG() {
|
||||
|
||||
auto &MIA = BC.MIA;
|
||||
|
||||
if (!isSimple())
|
||||
return false;
|
||||
|
||||
if (!(CurrentState == State::Disassembled))
|
||||
return false;
|
||||
|
||||
assert(BasicBlocks.empty() && "basic block list should be empty");
|
||||
assert((Labels.find(0) != Labels.end()) &&
|
||||
"first instruction should always have a label");
|
||||
|
||||
// Create basic blocks in the original layout order:
|
||||
//
|
||||
// * Every instruction with associated label marks
|
||||
// the beginning of a basic block.
|
||||
// * Conditional instruction marks the end of a basic block,
|
||||
// except when the following instruction is an
|
||||
// unconditional branch, and the unconditional branch is not
|
||||
// a destination of another branch. In the latter case, the
|
||||
// basic block will consist of a single unconditional branch
|
||||
// (missed optimization opportunity?).
|
||||
//
|
||||
// Created basic blocks are sorted in layout order since they are
|
||||
// created in the same order as instructions, and instructions are
|
||||
// sorted by offsets.
|
||||
BinaryBasicBlock *InsertBB{nullptr};
|
||||
BinaryBasicBlock *PrevBB{nullptr};
|
||||
for (auto &InstrInfo : Instructions) {
|
||||
auto LI = Labels.find(InstrInfo.first);
|
||||
if (LI != Labels.end()) {
|
||||
// Always create new BB at branch destination.
|
||||
PrevBB = InsertBB;
|
||||
InsertBB = addBasicBlock(LI->first, LI->second);
|
||||
}
|
||||
if (!InsertBB) {
|
||||
// It must be a fallthrough. Create a new block unless we see an
|
||||
// unconditional branch.
|
||||
assert(PrevBB && "no previous basic block for a fall through");
|
||||
if (MIA->isUnconditionalBranch(InstrInfo.second)) {
|
||||
// Temporarily restore inserter basic block.
|
||||
InsertBB = PrevBB;
|
||||
} else {
|
||||
InsertBB = addBasicBlock(InstrInfo.first,
|
||||
BC.Ctx->createTempSymbol("FT", true));
|
||||
}
|
||||
}
|
||||
|
||||
InsertBB->addInstruction(InstrInfo.second);
|
||||
|
||||
// How well do we detect tail calls here?
|
||||
if (MIA->isTerminator(InstrInfo.second)) {
|
||||
PrevBB = InsertBB;
|
||||
InsertBB = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Intermediate dump.
|
||||
DEBUG(print(dbgs(), /* PrintInstructions = */ true));
|
||||
|
||||
// TODO: handle properly calls to no-return functions,
|
||||
// e.g. exit(3), etc. Otherwise we'll see a false fall-through
|
||||
// blocks.
|
||||
|
||||
for (auto &Branch : LocalBranches) {
|
||||
|
||||
DEBUG(dbgs() << "registering branch [0x" << Twine::utohexstr(Branch.first)
|
||||
<< "] -> [0x" << Twine::utohexstr(Branch.second) << "]\n");
|
||||
BinaryBasicBlock *FromBB = getBasicBlockContainingOffset(Branch.first);
|
||||
assert(FromBB && "cannot find BB containing FROM branch");
|
||||
BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Branch.second);
|
||||
assert(ToBB && "cannot find BB containing TO branch");
|
||||
|
||||
// TODO: add weights here.
|
||||
//
|
||||
FromBB->addSuccessor(ToBB);
|
||||
}
|
||||
|
||||
// Add fall-through branches.
|
||||
PrevBB = nullptr;
|
||||
bool IsPrevFT = false; // Is previous block a fall-through.
|
||||
for (auto &BB : BasicBlocks) {
|
||||
if (IsPrevFT) {
|
||||
PrevBB->addSuccessor(&BB);
|
||||
}
|
||||
|
||||
MCInst &LastInst = BB.back();
|
||||
if (BB.succ_size() == 0) {
|
||||
IsPrevFT = MIA->isTerminator(LastInst) ? false : true;
|
||||
} else if (BB.succ_size() == 1) {
|
||||
IsPrevFT = MIA->isConditionalBranch(LastInst) ? true : false;
|
||||
} else {
|
||||
// Either ends with 2 branches, or with an indirect jump.
|
||||
IsPrevFT = false;
|
||||
}
|
||||
|
||||
PrevBB = &BB;
|
||||
}
|
||||
|
||||
if (!IsPrevFT) {
|
||||
// Possibly a call that does not return.
|
||||
DEBUG(dbgs() << "last block was marked as a fall-through\n");
|
||||
}
|
||||
|
||||
// Clean-up memory taken by instructions and labels.
|
||||
clearInstructions();
|
||||
clearLabels();
|
||||
clearLocalBranches();
|
||||
|
||||
// Update the state.
|
||||
CurrentState = State::CFG;
|
||||
|
||||
// Print the function in the new state.
|
||||
DEBUG(print(dbgs(), /* PrintInstructions = */ true));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace flo
|
||||
|
||||
} // namespace llvm
|
399
bolt/BinaryFunction.h
Normal file
399
bolt/BinaryFunction.h
Normal file
@ -0,0 +1,399 @@
|
||||
//===--- BinaryFunction.h - Interface for machine-level function ----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface to function in binary (machine) form. This is assembly-level
|
||||
// code representation with the control flow.
|
||||
//
|
||||
// TODO: memory management for instructions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_FLO_BINARY_FUNCTION_H
|
||||
#define LLVM_TOOLS_LLVM_FLO_BINARY_FUNCTION_H
|
||||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/ilist.h"
|
||||
#include "llvm/MC/MCCodeEmitter.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCDisassembler.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCInstrAnalysis.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/Object/ObjectFile.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <limits>
|
||||
|
||||
#include "BinaryBasicBlock.h"
|
||||
#include "BinaryContext.h"
|
||||
|
||||
using namespace llvm::object;
|
||||
|
||||
namespace llvm {
|
||||
|
||||
namespace flo {
|
||||
|
||||
/// BinaryFunction is a representation of machine-level function.
|
||||
//
|
||||
/// We use the term "Binary" as "Machine" was already taken.
|
||||
class BinaryFunction {
|
||||
public:
|
||||
enum class State : char {
|
||||
Empty = 0, /// Function body is empty
|
||||
Disassembled, /// Function have been disassembled
|
||||
CFG, /// Control flow graph have been built
|
||||
Assembled, /// Function has been assembled in memory
|
||||
};
|
||||
|
||||
static constexpr uint64_t COUNT_NO_PROFILE =
|
||||
std::numeric_limits<uint64_t>::max();
|
||||
|
||||
private:
|
||||
|
||||
/// Current state of the function.
|
||||
State CurrentState{State::Empty};
|
||||
|
||||
/// Name of the function as we know it.
|
||||
std::string Name;
|
||||
|
||||
/// Symbol associated with this function.
|
||||
SymbolRef Symbol;
|
||||
|
||||
/// Containing section
|
||||
SectionRef Section;
|
||||
|
||||
/// Address of the function in memory. Also could be an offset from
|
||||
/// base address for position independent binaries.
|
||||
uint64_t Address;
|
||||
|
||||
/// Original size of the function.
|
||||
uint64_t Size;
|
||||
|
||||
/// Offset in the file.
|
||||
uint64_t FileOffset{0};
|
||||
|
||||
/// Maximum size this function is allowed to have.
|
||||
uint64_t MaxSize{std::numeric_limits<uint64_t>::max()};
|
||||
|
||||
/// Alignment requirements for the function.
|
||||
uint64_t Alignment{1};
|
||||
|
||||
/// False if the function is too complex to reconstruct its control
|
||||
/// flow graph and re-assemble.
|
||||
bool IsSimple{true};
|
||||
|
||||
BinaryContext &BC;
|
||||
|
||||
/// The address for the code for this function in codegen memory.
|
||||
uint64_t ImageAddress{0};
|
||||
|
||||
/// The size of the code in memory.
|
||||
uint64_t ImageSize{0};
|
||||
|
||||
/// Name for the section this function code should reside in.
|
||||
std::string CodeSectionName;
|
||||
|
||||
/// The profile data for the number of times the function was executed.
|
||||
uint64_t ExecutionCount{COUNT_NO_PROFILE};
|
||||
|
||||
/// Release storage used by instructions.
|
||||
BinaryFunction &clearInstructions() {
|
||||
std::map<uint64_t, MCInst> TempMap;
|
||||
Instructions.swap(TempMap);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Release storage used by instructions.
|
||||
BinaryFunction &clearLabels() {
|
||||
std::map<uint64_t, MCSymbol *> TempMap;
|
||||
Labels.swap(TempMap);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Release memory taken by local branch info.
|
||||
BinaryFunction &clearLocalBranches() {
|
||||
std::vector<std::pair<uint64_t, uint64_t>> TempVector;
|
||||
LocalBranches.swap(TempVector);
|
||||
return *this;
|
||||
}
|
||||
|
||||
BinaryFunction &updateState(BinaryFunction::State State) {
|
||||
CurrentState = State;
|
||||
return *this;
|
||||
}
|
||||
|
||||
public:
|
||||
std::vector<std::pair<uint64_t, uint64_t>> LocalBranches;
|
||||
|
||||
std::map<uint64_t, MCSymbol *> Labels;
|
||||
|
||||
/// Temporary holder of instructions before CFG is constructed.
|
||||
std::map<uint64_t, MCInst> Instructions;
|
||||
|
||||
// Blocks are kept sorted in the layout order. If we need to change the
|
||||
// layout, the terminating instructions need to be modified.
|
||||
typedef std::vector<BinaryBasicBlock> BasicBlockListType;
|
||||
BasicBlockListType BasicBlocks;
|
||||
|
||||
typedef BasicBlockListType::iterator iterator;
|
||||
typedef BasicBlockListType::const_iterator const_iterator;
|
||||
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
|
||||
typedef std::reverse_iterator<iterator> reverse_iterator;
|
||||
|
||||
// CFG iterators.
|
||||
iterator begin() { return BasicBlocks.begin(); }
|
||||
const_iterator begin() const { return BasicBlocks.begin(); }
|
||||
iterator end () { return BasicBlocks.end(); }
|
||||
const_iterator end () const { return BasicBlocks.end(); }
|
||||
|
||||
reverse_iterator rbegin() { return BasicBlocks.rbegin(); }
|
||||
const_reverse_iterator rbegin() const { return BasicBlocks.rbegin(); }
|
||||
reverse_iterator rend () { return BasicBlocks.rend(); }
|
||||
const_reverse_iterator rend () const { return BasicBlocks.rend(); }
|
||||
|
||||
unsigned size() const { return (unsigned)BasicBlocks.size();}
|
||||
bool empty() const { return BasicBlocks.empty(); }
|
||||
const BinaryBasicBlock &front() const { return BasicBlocks.front(); }
|
||||
BinaryBasicBlock &front() { return BasicBlocks.front(); }
|
||||
const BinaryBasicBlock & back() const { return BasicBlocks.back(); }
|
||||
BinaryBasicBlock & back() { return BasicBlocks.back(); }
|
||||
|
||||
|
||||
BinaryFunction(StringRef Name, SymbolRef Symbol, SectionRef Section,
|
||||
uint64_t Address, uint64_t Size, BinaryContext &BC) :
|
||||
Name(Name), Symbol(Symbol), Section(Section), Address(Address),
|
||||
Size(Size), BC(BC), CodeSectionName((".text." + Name).str()) {}
|
||||
|
||||
/// Perform optimal code layout based on edge frequencies making necessary
|
||||
/// adjustments to instructions at the end of basic blocks.
|
||||
void optimizeLayout();
|
||||
|
||||
/// View CFG in graphviz program
|
||||
void viewGraph();
|
||||
|
||||
/// Basic block iterator
|
||||
|
||||
/// Return the name of the function as extracted from the binary file.
|
||||
StringRef getName() const {
|
||||
return Name;
|
||||
}
|
||||
|
||||
/// Return symbol associated with the function start.
|
||||
SymbolRef getSymbol() const {
|
||||
return Symbol;
|
||||
}
|
||||
|
||||
/// Return containing file section.
|
||||
SectionRef getSection() const {
|
||||
return Section;
|
||||
}
|
||||
|
||||
/// Return original address of the function (or offset from base for PIC).
|
||||
uint64_t getAddress() const {
|
||||
return Address;
|
||||
}
|
||||
|
||||
/// Return offset of the function body in the binary file.
|
||||
uint64_t getFileOffset() const {
|
||||
return FileOffset;
|
||||
}
|
||||
|
||||
/// Return (original) size of the function.
|
||||
uint64_t getSize() const {
|
||||
return Size;
|
||||
}
|
||||
|
||||
/// Return the maximum size the body of the function could have.
|
||||
uint64_t getMaxSize() const {
|
||||
return MaxSize;
|
||||
}
|
||||
|
||||
/// Return internal section name for this function.
|
||||
StringRef getCodeSectionName() const {
|
||||
assert(!CodeSectionName.empty() && "no section name for function");
|
||||
return StringRef(CodeSectionName);
|
||||
}
|
||||
|
||||
/// Return true if the function could be correctly processed.
|
||||
bool isSimple() const {
|
||||
return IsSimple;
|
||||
}
|
||||
|
||||
/// Return true if the given address \p PC is inside the function body.
|
||||
bool containsAddress(uint64_t PC) const {
|
||||
return Address <= PC && PC < Address + Size;
|
||||
}
|
||||
|
||||
/// Create a basic block at a given \p Offset in the
|
||||
/// function and append it to the end of list of blocks.
|
||||
/// Returns NULL if basic block already exists at the \p Offset.
|
||||
BinaryBasicBlock *addBasicBlock(uint64_t Offset, MCSymbol *Label = nullptr) {
|
||||
assert(!getBasicBlockAtOffset(Offset) && "basic block already exists");
|
||||
if (!Label)
|
||||
Label = BC.Ctx->createTempSymbol("BB", true);
|
||||
BasicBlocks.emplace_back(BinaryBasicBlock(Label, Offset));
|
||||
|
||||
return &BasicBlocks.back();
|
||||
}
|
||||
|
||||
BinaryBasicBlock *getOrCreateBasicBlockAt(uint64_t Offset,
|
||||
MCSymbol *Label = nullptr) {
|
||||
BinaryBasicBlock *BB = getBasicBlockAtOffset(Offset);
|
||||
if (!BB)
|
||||
BB = addBasicBlock(Offset, Label);
|
||||
|
||||
return BB;
|
||||
}
|
||||
|
||||
/// Return basic block that started at offset \p Offset.
|
||||
BinaryBasicBlock *getBasicBlockAtOffset(uint64_t Offset) {
|
||||
BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
|
||||
if (BB && BB->Offset == Offset)
|
||||
return BB;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Return basic block that originally contained offset \p Offset
|
||||
/// from the function start.
|
||||
BinaryBasicBlock *getBasicBlockContainingOffset(uint64_t Offset) {
|
||||
if (Offset > Size)
|
||||
return nullptr;
|
||||
|
||||
if (BasicBlocks.empty())
|
||||
return nullptr;
|
||||
|
||||
auto I = std::lower_bound(BasicBlocks.begin(),
|
||||
BasicBlocks.end(),
|
||||
BinaryBasicBlock(Offset));
|
||||
|
||||
if (I == BasicBlocks.end())
|
||||
return &BasicBlocks.back();
|
||||
|
||||
return &(*I);
|
||||
}
|
||||
|
||||
/// Dump function information to debug output. If \p PrintInstructions
|
||||
/// is true - include instruction disassembly.
|
||||
void dump(bool PrintInstructions = false) const {
|
||||
print(dbgs(), PrintInstructions);
|
||||
}
|
||||
|
||||
/// Print function information to the \p OS stream.
|
||||
void print(raw_ostream &OS, bool PrintInstructions = false) const;
|
||||
|
||||
void addInstruction(uint64_t Offset, MCInst &&Instruction) {
|
||||
Instructions.emplace(Offset, std::forward<MCInst>(Instruction));
|
||||
}
|
||||
|
||||
BinaryFunction &setFileOffset(uint64_t Offset) {
|
||||
FileOffset = Offset;
|
||||
return *this;
|
||||
}
|
||||
|
||||
BinaryFunction &setMaxSize(uint64_t Size) {
|
||||
MaxSize = Size;
|
||||
return *this;
|
||||
}
|
||||
|
||||
BinaryFunction &setSimple(bool Simple) {
|
||||
IsSimple = Simple;
|
||||
return *this;
|
||||
}
|
||||
|
||||
BinaryFunction &setAlignment(uint64_t Align) {
|
||||
Alignment = Align;
|
||||
return *this;
|
||||
}
|
||||
|
||||
uint64_t getAlignment() const {
|
||||
return Alignment;
|
||||
}
|
||||
|
||||
BinaryFunction &setImageAddress(uint64_t Address) {
|
||||
ImageAddress = Address;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Return the address of this function' image in memory.
|
||||
uint64_t getImageAddress() const {
|
||||
return ImageAddress;
|
||||
}
|
||||
|
||||
BinaryFunction &setImageSize(uint64_t Size) {
|
||||
ImageSize = Size;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Return the size of this function' image in memory.
|
||||
uint64_t getImageSize() const {
|
||||
return ImageSize;
|
||||
}
|
||||
|
||||
/// Set the profile data for the number of times the function was called.
|
||||
BinaryFunction &setExecutionCount(uint64_t Count) {
|
||||
ExecutionCount = Count;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Return the profile information about the number of times
|
||||
/// the function was executed.
|
||||
///
|
||||
/// Return COUNT_NO_PROFILE if there's no profile info.
|
||||
uint64_t getExecutionCount() const {
|
||||
return ExecutionCount;
|
||||
}
|
||||
|
||||
/// Disassemble function from raw data \p FunctionData.
|
||||
/// If successful, this function will populate the list of instructions
|
||||
/// for this function together with offsets from the function start
|
||||
/// in the input. It will also populate Labels with destinations for
|
||||
/// local branches, and LocalBranches with [from, to] info.
|
||||
///
|
||||
/// \p FunctionData is the set bytes representing the function body.
|
||||
///
|
||||
/// The Function should be properly initialized before this function
|
||||
/// is called. I.e. function address and size should be set.
|
||||
///
|
||||
/// Returns true on successful disassembly, and updates the current
|
||||
/// state to State:Disassembled.
|
||||
///
|
||||
/// Returns false if disassembly failed.
|
||||
bool disassemble(ArrayRef<uint8_t> FunctionData);
|
||||
|
||||
/// Builds a list of basic blocks with successor and predecessor info.
|
||||
///
|
||||
/// The function should in Disassembled state prior to call.
|
||||
///
|
||||
/// Returns true on success and update the current function state to
|
||||
/// State::CFG. Returns false if CFG cannot be built.
|
||||
bool buildCFG();
|
||||
|
||||
virtual ~BinaryFunction() {}
|
||||
};
|
||||
|
||||
inline raw_ostream &operator<<(raw_ostream &OS,
|
||||
const BinaryFunction::State State) {
|
||||
switch (State) {
|
||||
default: OS << "<unknown>"; break;
|
||||
case BinaryFunction::State::Empty: OS << "empty"; break;
|
||||
case BinaryFunction::State::Disassembled: OS << "disassembled"; break;
|
||||
case BinaryFunction::State::CFG: OS << "CFG constructed"; break;
|
||||
case BinaryFunction::State::Assembled: OS << "assembled"; break;
|
||||
}
|
||||
|
||||
return OS;
|
||||
}
|
||||
|
||||
} // namespace flo
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
@ -2,7 +2,6 @@ set(LLVM_LINK_COMPONENTS
|
||||
${LLVM_TARGETS_TO_BUILD}
|
||||
CodeGen
|
||||
Core
|
||||
DebugInfoDWARF
|
||||
MC
|
||||
MCDisassembler
|
||||
MCParser
|
||||
@ -13,4 +12,6 @@ set(LLVM_LINK_COMPONENTS
|
||||
|
||||
add_llvm_tool(llvm-flo
|
||||
llvm-flo.cpp
|
||||
BinaryBasicBlock.cpp
|
||||
BinaryFunction.cpp
|
||||
)
|
||||
|
@ -19,4 +19,4 @@
|
||||
type = Tool
|
||||
name = llvm-flo
|
||||
parent = Tools
|
||||
required_libraries = DebugInfoDWARF MC MCDisassembler MCParser Object all-targets
|
||||
required_libraries = MC MCDisassembler MCParser Object all-targets
|
||||
|
@ -7,12 +7,17 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This is a binary optimizer that will take 'perf' output and change
|
||||
// basic block layout for better performance (a.k.a. branch straightening),
|
||||
// plus some other optimizations that are better performed on a binary.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
|
||||
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
|
||||
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
|
||||
#include "llvm/MC/MCAsmBackend.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCDisassembler.h"
|
||||
@ -20,7 +25,10 @@
|
||||
#include "llvm/MC/MCInstrAnalysis.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCObjectFileInfo.h"
|
||||
#include "llvm/MC/MCObjectStreamer.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/MC/MCSection.h"
|
||||
#include "llvm/MC/MCSectionELF.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
@ -38,13 +46,20 @@
|
||||
#include "llvm/Support/ToolOutputFile.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
#include "BinaryBasicBlock.h"
|
||||
#include "BinaryContext.h"
|
||||
#include "BinaryFunction.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <system_error>
|
||||
|
||||
#undef DEBUG_TYPE
|
||||
#define DEBUG_TYPE "flo"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace object;
|
||||
using namespace flo;
|
||||
|
||||
// Tool options.
|
||||
static cl::opt<std::string>
|
||||
@ -57,11 +72,16 @@ static cl::opt<std::string>
|
||||
OutputFilename("o", cl::desc("<output file>"), cl::Required);
|
||||
|
||||
static cl::list<std::string>
|
||||
FunctionNames("funcs", cl::desc("list of functions to optimzize"),
|
||||
cl::Optional);
|
||||
FunctionNames("funcs",
|
||||
cl::CommaSeparated,
|
||||
cl::desc("list of functions to optimize"),
|
||||
cl::value_desc("func1,func2,func3,..."));
|
||||
|
||||
static cl::opt<bool>
|
||||
EliminateUnreachable("eliminate-unreachable",
|
||||
cl::desc("eliminate unreachable code"),
|
||||
cl::Optional);
|
||||
|
||||
// Tool name used for reporting.
|
||||
static StringRef ToolName;
|
||||
|
||||
static void report_error(StringRef Message, std::error_code EC) {
|
||||
@ -70,6 +90,576 @@ static void report_error(StringRef Message, std::error_code EC) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void error(std::error_code EC) {
|
||||
if (!EC)
|
||||
return;
|
||||
|
||||
errs() << ToolName << ": error reading file: " << EC.message() << ".\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static std::vector<T> singletonSet(T t) {
|
||||
std::vector<T> Vec;
|
||||
Vec.push_back(std::move(t));
|
||||
return Vec;
|
||||
}
|
||||
|
||||
/// Class responsible for allocating and managing code and data sections.
|
||||
class ExecutableFileMemoryManager : public SectionMemoryManager {
|
||||
public:
|
||||
|
||||
// Keep [section name] -> [allocated address, size] map for later remapping.
|
||||
std::map<std::string, std::pair<uint64_t,uint64_t>> SectionAddressInfo;
|
||||
|
||||
ExecutableFileMemoryManager() {}
|
||||
|
||||
uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
|
||||
unsigned SectionID,
|
||||
StringRef SectionName) override {
|
||||
auto ret =
|
||||
SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID,
|
||||
SectionName);
|
||||
DEBUG(dbgs() << "FLO: allocating code section : " << SectionName
|
||||
<< " with size " << Size << ", alignment " << Alignment
|
||||
<< " at 0x" << ret << "\n");
|
||||
|
||||
SectionAddressInfo[SectionName] = {reinterpret_cast<uint64_t>(ret), Size};
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
|
||||
unsigned SectionID, StringRef SectionName,
|
||||
bool IsReadOnly) override {
|
||||
DEBUG(dbgs() << "FLO: allocating data section : " << SectionName
|
||||
<< " with size " << Size << ", alignment "
|
||||
<< Alignment << "\n");
|
||||
errs() << "FLO-WARNING: allocating data section.\n";
|
||||
return SectionMemoryManager::allocateDataSection(Size, Alignment, SectionID,
|
||||
SectionName, IsReadOnly);
|
||||
}
|
||||
|
||||
// Tell EE that we guarantee we don't need stubs.
|
||||
bool allowStubAllocation() const override { return false; }
|
||||
|
||||
bool finalizeMemory(std::string *ErrMsg = nullptr) override {
|
||||
DEBUG(dbgs() << "FLO: finalizeMemory()\n");
|
||||
return SectionMemoryManager::finalizeMemory(ErrMsg);
|
||||
}
|
||||
};
|
||||
|
||||
/// Create BinaryContext for a given architecture \p ArchName and
|
||||
/// triple \p TripleName.
|
||||
static std::unique_ptr<BinaryContext> CreateBinaryContext(
|
||||
std::string ArchName,
|
||||
std::string TripleName) {
|
||||
|
||||
std::string Error;
|
||||
|
||||
std::unique_ptr<Triple> TheTriple = llvm::make_unique<Triple>(TripleName);
|
||||
const Target *TheTarget = TargetRegistry::lookupTarget(ArchName,
|
||||
*TheTriple,
|
||||
Error);
|
||||
if (!TheTarget) {
|
||||
errs() << ToolName << ": " << Error;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::unique_ptr<const MCRegisterInfo> MRI(
|
||||
TheTarget->createMCRegInfo(TripleName));
|
||||
if (!MRI) {
|
||||
errs() << "error: no register info for target " << TripleName << "\n";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Set up disassembler.
|
||||
std::unique_ptr<const MCAsmInfo> AsmInfo(
|
||||
TheTarget->createMCAsmInfo(*MRI, TripleName));
|
||||
if (!AsmInfo) {
|
||||
errs() << "error: no assembly info for target " << TripleName << "\n";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::unique_ptr<const MCSubtargetInfo> STI(
|
||||
TheTarget->createMCSubtargetInfo(TripleName, "", ""));
|
||||
if (!STI) {
|
||||
errs() << "error: no subtarget info for target " << TripleName << "\n";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
|
||||
if (!MII) {
|
||||
errs() << "error: no instruction info for target " << TripleName << "\n";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::unique_ptr<MCObjectFileInfo> MOFI =
|
||||
llvm::make_unique<MCObjectFileInfo>();
|
||||
std::unique_ptr<MCContext> Ctx =
|
||||
llvm::make_unique<MCContext>(AsmInfo.get(), MRI.get(), MOFI.get());
|
||||
MOFI->InitMCObjectFileInfo(*TheTriple, Reloc::Default,
|
||||
CodeModel::Default, *Ctx);
|
||||
|
||||
std::unique_ptr<MCDisassembler> DisAsm(
|
||||
TheTarget->createMCDisassembler(*STI, *Ctx));
|
||||
|
||||
if (!DisAsm) {
|
||||
errs() << "error: no disassembler for target " << TripleName << "\n";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::unique_ptr<const MCInstrAnalysis> MIA(
|
||||
TheTarget->createMCInstrAnalysis(MII.get()));
|
||||
if (!MIA) {
|
||||
errs() << "error: failed to create instruction analysis for target"
|
||||
<< TripleName << "\n";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
|
||||
std::unique_ptr<MCInstPrinter> InstructionPrinter(
|
||||
TheTarget->createMCInstPrinter(Triple(TripleName), AsmPrinterVariant,
|
||||
*AsmInfo, *MII, *MRI));
|
||||
if (!InstructionPrinter) {
|
||||
errs() << "error: no instruction printer for target " << TripleName
|
||||
<< '\n';
|
||||
return nullptr;
|
||||
}
|
||||
InstructionPrinter->setPrintImmHex(true);
|
||||
|
||||
auto MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, *Ctx);
|
||||
|
||||
auto MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, "");
|
||||
|
||||
// Make sure we don't miss any output on core dumps.
|
||||
outs().SetUnbuffered();
|
||||
errs().SetUnbuffered();
|
||||
dbgs().SetUnbuffered();
|
||||
|
||||
auto BC =
|
||||
llvm::make_unique<BinaryContext>(std::move(Ctx),
|
||||
std::move(TheTriple),
|
||||
TheTarget,
|
||||
MCE,
|
||||
std::move(MOFI),
|
||||
std::move(AsmInfo),
|
||||
std::move(MII),
|
||||
std::move(STI),
|
||||
std::move(InstructionPrinter),
|
||||
std::move(MIA),
|
||||
std::move(MRI),
|
||||
std::move(DisAsm),
|
||||
MAB);
|
||||
|
||||
return BC;
|
||||
}
|
||||
|
||||
static void OptimizeFile(ELFObjectFileBase *File) {
|
||||
|
||||
// FIXME: there should be some way to extract arch and triple information
|
||||
// from the file.
|
||||
std::unique_ptr<BinaryContext> BC =
|
||||
std::move(CreateBinaryContext("x86-64", "x86_64-unknown-linux"));
|
||||
if (!BC) {
|
||||
errs() << "failed to create a binary context\n";
|
||||
return;
|
||||
}
|
||||
|
||||
// Store all non-zero file symbols in this map for quick address lookup.
|
||||
std::map<uint64_t, SymbolRef> FileSymRefs;
|
||||
|
||||
// Entry point to the binary.
|
||||
//
|
||||
// Note: this is ELF header entry point, but we could have more entry points
|
||||
// from constructors etc.
|
||||
BinaryFunction *EntryPointFunction{nullptr};
|
||||
|
||||
// Populate array of binary functions and file symbols
|
||||
// from file symbol table.
|
||||
//
|
||||
// For local symbols we want to keep track of associated FILE symbol for
|
||||
// disambiguation by name.
|
||||
std::map<uint64_t, BinaryFunction> BinaryFunctions;
|
||||
StringRef FileSymbolName;
|
||||
for (const SymbolRef &Symbol : File->symbols()) {
|
||||
// Keep undefined symbols for pretty printing?
|
||||
if (Symbol.getFlags() & SymbolRef::SF_Undefined)
|
||||
continue;
|
||||
|
||||
ErrorOr<StringRef> Name = Symbol.getName();
|
||||
error(Name.getError());
|
||||
|
||||
if (Symbol.getType() == SymbolRef::ST_File) {
|
||||
// Could be used for local symbol disambiguation.
|
||||
FileSymbolName = *Name;
|
||||
continue;
|
||||
}
|
||||
|
||||
ErrorOr<uint64_t> AddressOrErr = Symbol.getAddress();
|
||||
error(AddressOrErr.getError());
|
||||
uint64_t Address = *AddressOrErr;
|
||||
if (Address == 0) {
|
||||
if (Symbol.getType() == SymbolRef::ST_Function)
|
||||
errs() << "FLO-WARNING: function with 0 address seen\n";
|
||||
continue;
|
||||
}
|
||||
|
||||
FileSymRefs[Address] = Symbol;
|
||||
|
||||
// Only consider ST_Function symbols for functions. Although this
|
||||
// assumption could be broken by assembly functions for which the type
|
||||
// could be wrong.
|
||||
if (Symbol.getType() != SymbolRef::ST_Function) {
|
||||
// FIXME: add it to the address map.
|
||||
continue;
|
||||
}
|
||||
|
||||
// TODO: populate address map with PLT entries for better readability.
|
||||
|
||||
// Ignore function with 0 size for now (possibly coming from assembly).
|
||||
auto SymbolSize = ELFSymbolRef(Symbol).getSize();
|
||||
if (SymbolSize == 0)
|
||||
continue;
|
||||
|
||||
// There's nothing horribly wrong with anonymous symbols, but let's
|
||||
// ignore them for now.
|
||||
if (Name->empty())
|
||||
continue;
|
||||
|
||||
ErrorOr<section_iterator> SectionOrErr = Symbol.getSection();
|
||||
error(SectionOrErr.getError());
|
||||
section_iterator Section = *SectionOrErr;
|
||||
if (Section == File->section_end()) {
|
||||
// Could be an absolute symbol. Could record for pretty printing.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Disambiguate local function name. Since we don't know if we'll see
|
||||
// a global with the same name, always modify the local function name.
|
||||
std::string UniqueFunctionName;
|
||||
if (!(Symbol.getFlags() & SymbolRef::SF_Global)) {
|
||||
unsigned LocalCount = 1;
|
||||
auto LocalName = *Name + "/" + FileSymbolName + "/";
|
||||
while (BC->GlobalSymbols.find((LocalName + Twine(LocalCount)).str()) !=
|
||||
BC->GlobalSymbols.end()) {
|
||||
++LocalCount;
|
||||
}
|
||||
UniqueFunctionName = (LocalName + Twine(LocalCount)).str();
|
||||
} else {
|
||||
auto I = BC->GlobalSymbols.find(*Name);
|
||||
assert(I == BC->GlobalSymbols.end() && "global name not unique");
|
||||
UniqueFunctionName = *Name;
|
||||
}
|
||||
|
||||
// Create the function and add to the map.
|
||||
BinaryFunctions.emplace(
|
||||
Address,
|
||||
BinaryFunction(UniqueFunctionName, Symbol, *Section, Address,
|
||||
SymbolSize, *BC)
|
||||
);
|
||||
|
||||
// Add the name to global symbols map.
|
||||
BC->GlobalSymbols[UniqueFunctionName] = Address;
|
||||
|
||||
// Add to the reverse map.
|
||||
BC->GlobalAddresses.emplace(std::make_pair(Address, UniqueFunctionName));
|
||||
}
|
||||
|
||||
// Disassemble every function and build it's control flow graph.
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
BinaryFunction &Function = BFI.second;
|
||||
|
||||
SectionRef Section = Function.getSection();
|
||||
assert(Section.containsSymbol(Function.getSymbol()) &&
|
||||
"symbol not in section");
|
||||
|
||||
// When could it happen?
|
||||
if (!Section.isText() || Section.isVirtual() || !Section.getSize()) {
|
||||
DEBUG(dbgs() << "FLO: corresponding section non-executable or empty "
|
||||
<< "for function " << Function.getName());
|
||||
continue;
|
||||
}
|
||||
|
||||
// Set the proper maximum size value after the whole symbol table
|
||||
// has been processed.
|
||||
auto SymRefI = FileSymRefs.upper_bound(Function.getAddress());
|
||||
if (SymRefI != FileSymRefs.end()) {
|
||||
auto MaxSize = SymRefI->first - Function.getAddress();
|
||||
assert(MaxSize >= Function.getSize() &&
|
||||
"symbol seen in the middle of the function");
|
||||
Function.setMaxSize(MaxSize);
|
||||
}
|
||||
|
||||
StringRef SectionContents;
|
||||
error(Section.getContents(SectionContents));
|
||||
|
||||
assert(SectionContents.size() == Section.getSize() &&
|
||||
"section size mismatch");
|
||||
|
||||
// Function offset from the section start.
|
||||
auto FunctionOffset = Function.getAddress() - Section.getAddress();
|
||||
|
||||
// Offset of the function in the file.
|
||||
Function.setFileOffset(
|
||||
SectionContents.data() - File->getData().data() + FunctionOffset);
|
||||
|
||||
ArrayRef<uint8_t> FunctionData(
|
||||
reinterpret_cast<const uint8_t *>
|
||||
(SectionContents.data()) + FunctionOffset,
|
||||
Function.getSize());
|
||||
|
||||
if (!Function.disassemble(FunctionData))
|
||||
continue;
|
||||
|
||||
if (!Function.buildCFG())
|
||||
continue;
|
||||
|
||||
} // Iterate over all functions
|
||||
|
||||
|
||||
// Run optimization passes.
|
||||
//
|
||||
// FIXME: use real optimization passes.
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
auto &Function = BFI.second;
|
||||
// Detect and eliminate unreachable basic blocks. We could have those
|
||||
// filled with nops and they are used for alignment.
|
||||
//
|
||||
// FIXME: this wouldn't work with C++ exceptions until we implement
|
||||
// support for those as there will be "invisible" edges
|
||||
// in the graph.
|
||||
if (EliminateUnreachable) {
|
||||
bool IsFirst = true;
|
||||
for (auto &BB : Function) {
|
||||
if (!IsFirst && BB.pred_empty()) {
|
||||
outs() << "FLO: basic block " << BB.getName() << " in function "
|
||||
<< Function.getName() << " is dead\n";
|
||||
// TODO: currently lacking interface to eliminate basic block.
|
||||
}
|
||||
IsFirst = false;
|
||||
}
|
||||
DEBUG(dbgs() << "*** After unreachable block elimination ***\n");
|
||||
DEBUG(Function.print(dbgs(), /* PrintInstructions = */ true));
|
||||
}
|
||||
}
|
||||
|
||||
std::error_code EC;
|
||||
std::unique_ptr<tool_output_file> Out =
|
||||
llvm::make_unique<tool_output_file>(OutputFilename + ".o",
|
||||
EC,sys::fs::F_None);
|
||||
|
||||
if (EC) {
|
||||
// FIXME: handle error
|
||||
return;
|
||||
}
|
||||
|
||||
std::unique_ptr<tool_output_file> RealOut =
|
||||
llvm::make_unique<tool_output_file>(OutputFilename, EC, sys::fs::F_None,
|
||||
0777);
|
||||
if (EC) {
|
||||
// FIXME: handle error
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy input file.
|
||||
RealOut->os() << File->getData();
|
||||
|
||||
std::unique_ptr<buffer_ostream> BOS =
|
||||
make_unique<buffer_ostream>(Out->os());
|
||||
raw_pwrite_stream *OS = BOS.get();
|
||||
|
||||
// Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB)
|
||||
// and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these
|
||||
// two instances.
|
||||
std::unique_ptr<MCStreamer> Streamer(
|
||||
BC->TheTarget->createMCObjectStreamer(*BC->TheTriple,
|
||||
*BC->Ctx,
|
||||
*BC->MAB,
|
||||
*OS,
|
||||
BC->MCE,
|
||||
*BC->STI,
|
||||
/* RelaxAll */ false,
|
||||
/* DWARFMustBeAtTheEnd */ false));
|
||||
|
||||
Streamer->InitSections(false);
|
||||
|
||||
// Output functions one by one.
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
auto &Function = BFI.second;
|
||||
|
||||
if (!Function.isSimple())
|
||||
continue;
|
||||
|
||||
// Only overwrite functions from the list if non-empty.
|
||||
if (!FunctionNames.empty()) {
|
||||
bool IsValid = false;
|
||||
for (auto &Name : FunctionNames) {
|
||||
if (Function.getName() == Name) {
|
||||
IsValid = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!IsValid)
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "FLO: generating code for function \""
|
||||
<< Function.getName() << "\"\n");
|
||||
|
||||
// No need for human readability?
|
||||
// FIXME: what difference does it make in reality?
|
||||
//Ctx.setUseNamesOnTempLabels(false);
|
||||
|
||||
// Emit function start
|
||||
|
||||
// Each fuction is emmitted into its own section.
|
||||
MCSectionELF *FunctionSection =
|
||||
BC->Ctx->getELFSection(Function.getCodeSectionName(),
|
||||
ELF::SHT_PROGBITS,
|
||||
ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
|
||||
|
||||
MCSection *Section = FunctionSection;
|
||||
Streamer->SwitchSection(Section);
|
||||
|
||||
Streamer->EmitCodeAlignment(Function.getAlignment());
|
||||
|
||||
MCSymbol *FunctionSymbol = BC->Ctx->getOrCreateSymbol(Function.getName());
|
||||
Streamer->EmitSymbolAttribute(FunctionSymbol, MCSA_ELF_TypeFunction);
|
||||
Streamer->EmitLabel(FunctionSymbol);
|
||||
|
||||
// Emit code.
|
||||
for (const auto &BB : Function) {
|
||||
Streamer->EmitLabel(BB.getLabel());
|
||||
for (const auto &Instr : BB) {
|
||||
Streamer->EmitInstruction(Instr, *BC->STI);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: is there any use in emiting end of function?
|
||||
// Perhaps once we have a support for C++ exceptions.
|
||||
//auto FunctionEndLabel = Ctx.createTempSymbol("func_end");
|
||||
//Streamer->EmitLabel(FunctionEndLabel);
|
||||
//Streamer->emitELFSize(FunctionSymbol, MCExpr());
|
||||
}
|
||||
|
||||
Streamer->Finish();
|
||||
|
||||
// Get output object as ObjectFile.
|
||||
std::unique_ptr<MemoryBuffer> ObjectMemBuffer =
|
||||
MemoryBuffer::getMemBuffer(BOS->str(), "in-memory object file", false);
|
||||
ErrorOr<std::unique_ptr<object::ObjectFile>> ObjOrErr =
|
||||
object::ObjectFile::createObjectFile(ObjectMemBuffer->getMemBufferRef());
|
||||
|
||||
if (std::error_code EC = ObjOrErr.getError()) {
|
||||
report_error(InputFilename, EC);
|
||||
return;
|
||||
}
|
||||
|
||||
std::unique_ptr<ExecutableFileMemoryManager>
|
||||
EFMM(new ExecutableFileMemoryManager());
|
||||
|
||||
// FIXME: use notifyObjectLoaded() to remap sections.
|
||||
|
||||
DEBUG(dbgs() << "Creating OLT\n");
|
||||
// Run ObjectLinkingLayer() with custom memory manager and symbol resolver.
|
||||
orc::ObjectLinkingLayer<> OLT;
|
||||
|
||||
auto Resolver = orc::createLambdaResolver(
|
||||
[&](const std::string &Name) {
|
||||
DEBUG(dbgs() << "FLO: looking for " << Name << "\n");
|
||||
auto I = BC->GlobalSymbols.find(Name);
|
||||
if (I == BC->GlobalSymbols.end())
|
||||
return RuntimeDyld::SymbolInfo(nullptr);
|
||||
return RuntimeDyld::SymbolInfo(I->second,
|
||||
JITSymbolFlags::None);
|
||||
},
|
||||
[](const std::string &S) {
|
||||
DEBUG(dbgs() << "FLO: resolving " << S << "\n");
|
||||
return nullptr;
|
||||
}
|
||||
);
|
||||
// FIXME:
|
||||
auto ObjectsHandle = OLT.addObjectSet(
|
||||
singletonSet(std::move(ObjOrErr.get())),
|
||||
EFMM.get(),
|
||||
//std::move(EFMM),
|
||||
std::move(Resolver));
|
||||
//OLT.takeOwnershipOfBuffers(ObjectsHandle, );
|
||||
|
||||
// Map every function/section current address in memory to that in
|
||||
// the output binary.
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
auto &Function = BFI.second;
|
||||
if (!Function.isSimple())
|
||||
continue;
|
||||
|
||||
auto SAI = EFMM->SectionAddressInfo.find(Function.getCodeSectionName());
|
||||
if (SAI != EFMM->SectionAddressInfo.end()) {
|
||||
DEBUG(dbgs() << "FLO: mapping 0x" << Twine::utohexstr(SAI->second.first)
|
||||
<< " to 0x" << Twine::utohexstr(Function.getAddress())
|
||||
<< '\n');
|
||||
OLT.mapSectionAddress(ObjectsHandle,
|
||||
reinterpret_cast<const void*>(SAI->second.first),
|
||||
Function.getAddress());
|
||||
Function.setImageAddress(SAI->second.first);
|
||||
Function.setImageSize(SAI->second.second);
|
||||
} else {
|
||||
errs() << "FLO: cannot remap function " << Function.getName() << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
OLT.emitAndFinalize(ObjectsHandle);
|
||||
|
||||
// FIXME: is there a less painful way to obtain assembler/writer?
|
||||
auto &Writer =
|
||||
static_cast<MCObjectStreamer*>(Streamer.get())->getAssembler().getWriter();
|
||||
Writer.setStream(RealOut->os());
|
||||
|
||||
// Overwrite function in the output file.
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
auto &Function = BFI.second;
|
||||
|
||||
if (Function.getImageAddress() == 0 || Function.getImageSize() == 0)
|
||||
continue;
|
||||
|
||||
if (Function.getImageSize() > Function.getMaxSize()) {
|
||||
errs() << "FLO-WARNING: new function size (0x"
|
||||
<< Twine::utohexstr(Function.getImageSize())
|
||||
<< ") is larger than maximum allowed size (0x"
|
||||
<< Twine::utohexstr(Function.getMaxSize())
|
||||
<< ") for function " << Function.getName() << '\n';
|
||||
continue;
|
||||
}
|
||||
|
||||
// Overwrite function in the output file.
|
||||
outs() << "FLO: rewriting function \"" << Function.getName() << "\"\n";
|
||||
RealOut->os().pwrite(
|
||||
reinterpret_cast<char *>(Function.getImageAddress()),
|
||||
Function.getImageSize(),
|
||||
Function.getFileOffset());
|
||||
|
||||
// Write nops at the end of the function.
|
||||
auto Pos = RealOut->os().tell();
|
||||
RealOut->os().seek(Function.getFileOffset() + Function.getImageSize());
|
||||
BC->MAB->writeNopData(Function.getMaxSize() - Function.getImageSize(),
|
||||
&Writer);
|
||||
RealOut->os().seek(Pos);
|
||||
}
|
||||
|
||||
if (EntryPointFunction) {
|
||||
DEBUG(dbgs() << "FLO: entry point function is "
|
||||
<< EntryPointFunction->getName() << '\n');
|
||||
} else {
|
||||
DEBUG(dbgs() << "FLO: no entry point function was set\n");
|
||||
}
|
||||
|
||||
// TODO: we should find a way to mark the binary as optimized by us.
|
||||
|
||||
Out->keep();
|
||||
RealOut->keep();
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
// Print a stack trace if we signal out.
|
||||
sys::PrintStackTraceOnErrorSignal();
|
||||
@ -104,7 +694,7 @@ int main(int argc, char **argv) {
|
||||
Binary &Binary = *BinaryOrErr.get().getBinary();
|
||||
|
||||
if (ELFObjectFileBase *e = dyn_cast<ELFObjectFileBase>(&Binary)) {
|
||||
outs() << "mind blown : " << e << "!\n";
|
||||
OptimizeFile(e);
|
||||
} else {
|
||||
report_error(InputFilename, object_error::invalid_file_type);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user