BOLT: Read and tie .debug_line info to IR.

Summary:
Reads information in the DWARF .debug_line section using LLVM and
tie every MCInst to one line of a line table from the input binary. Subsequent
diffs will update this information to match the final binary layout and
output updated line tables.

(cherry picked from FBD2989813)
This commit is contained in:
Gabriel Poesia 2016-02-25 16:57:07 -08:00 committed by Maksim Panchenko
parent 62da18d32a
commit 77a6b72842
9 changed files with 264 additions and 25 deletions

View File

@ -43,5 +43,12 @@ MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
return Symbol;
}
void BinaryContext::buildOffsetToDWARFCompileUnitMap() {
for (const auto &CU : DwCtx->compile_units()) {
OffsetToDwarfCU[CU->getOffset()] = CU.get();
}
}
} // namespace bolt
} // namespace llvm

View File

@ -15,6 +15,8 @@
#define LLVM_TOOLS_LLVM_BOLT_BINARY_CONTEXT_H
#include "llvm/ADT/Triple.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
@ -55,8 +57,14 @@ public:
// Set of addresses we cannot relocate because we have a direct branch to it.
std::set<uint64_t> InterproceduralBranchTargets;
// Map from offset in the .debug_info section of the binary the
// DWARF Compilation Unit that starts at that offset.
std::map<uint32_t, DWARFCompileUnit *> OffsetToDwarfCU;
std::unique_ptr<MCContext> Ctx;
std::unique_ptr<DWARFContext> DwCtx;
std::unique_ptr<Triple> TheTriple;
const Target *TheTarget;
@ -86,6 +94,7 @@ public:
const DataReader &DR;
BinaryContext(std::unique_ptr<MCContext> Ctx,
std::unique_ptr<DWARFContext> DwCtx,
std::unique_ptr<Triple> TheTriple,
const Target *TheTarget,
std::string TripleName,
@ -98,8 +107,10 @@ public:
std::unique_ptr<const MCInstrAnalysis> MIA,
std::unique_ptr<const MCRegisterInfo> MRI,
std::unique_ptr<MCDisassembler> DisAsm,
const DataReader &DR) :
const DataReader &DR,
bool LoadDebugContext) :
Ctx(std::move(Ctx)),
DwCtx(std::move(DwCtx)),
TheTriple(std::move(TheTriple)),
TheTarget(TheTarget),
TripleName(TripleName),
@ -112,7 +123,11 @@ public:
MIA(std::move(MIA)),
MRI(std::move(MRI)),
DisAsm(std::move(DisAsm)),
DR(DR) {}
DR(DR) {
if (LoadDebugContext) {
buildOffsetToDWARFCompileUnitMap();
}
}
~BinaryContext() {}
@ -121,6 +136,11 @@ public:
/// If there are multiple symbols registered at the \p Address, then
/// return the first one.
MCSymbol *getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix);
private:
// Iterates over all DWARF compilation units and maps their offset in the
// binary to themselves in OffsetDwarfCUMap
void buildOffsetToDWARFCompileUnitMap();
};
} // namespace bolt

View File

@ -13,7 +13,9 @@
#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"
#include "DataReader.h"
#include "DebugLineTableRowRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@ -38,8 +40,35 @@ namespace opts {
static cl::opt<bool>
PrintClusters("print-clusters", cl::desc("print clusters"), cl::Optional);
static cl::opt<bool>
PrintDebugInfo("print-debug-info",
cl::desc("print debug info when printing functions"),
cl::Hidden);
} // namespace opts
namespace {
// Finds which DWARF compile unit owns an address in the executable by
// querying .debug_aranges.
DWARFCompileUnit *FindCompileUnitForAddress(uint64_t Address,
const BinaryContext &BC) {
auto DebugAranges = BC.DwCtx->getDebugAranges();
if (!DebugAranges)
return nullptr;
uint32_t CompileUnitIndex = DebugAranges->findAddress(Address);
auto It = BC.OffsetToDwarfCU.find(CompileUnitIndex);
if (It == BC.OffsetToDwarfCU.end()) {
return nullptr;
} else {
return It->second;
}
}
} // namespace
uint64_t BinaryFunction::Count = 0;
BinaryBasicBlock *
@ -135,6 +164,15 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
}
};
// Used in printInstruction below to print debug line information.
DWARFCompileUnit *Unit = nullptr;
const DWARFDebugLine::LineTable *LineTable = nullptr;
if (opts::PrintDebugInfo) {
Unit = FindCompileUnitForAddress(getAddress(), BC);
LineTable = Unit ? BC.DwCtx->getLineTableForUnit(Unit) : nullptr;
}
auto printInstruction = [&](const MCInst &Instruction) {
if (BC.MIA->isEHLabel(Instruction)) {
OS << " EH_LABEL: "
@ -168,6 +206,21 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
OS << "; action: " << Action;
}
}
if (opts::PrintDebugInfo && LineTable) {
auto RowRef = DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
if (RowRef != DebugLineTableRowRef::NULL_ROW) {
const auto &Row = LineTable->Rows[RowRef.RowIndex];
OS << " # debug line "
<< LineTable->Prologue.FileNames[Row.File - 1].Name
<< ":" << Row.Line;
if (Row.Column) {
OS << ":" << Row.Column;
}
}
}
OS << "\n";
// In case we need MCInst printer:
// Instr.dump_pretty(OS, InstructionPrinter.get());
@ -294,12 +347,18 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
OS << "End of Function \"" << getName() << "\"\n\n";
}
bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData,
bool ExtractDebugLineData) {
assert(FunctionData.size() == getSize() &&
"function size does not match raw data size");
auto &Ctx = BC.Ctx;
auto &MIA = BC.MIA;
DWARFCompileUnit *CompileUnit = nullptr;
if (ExtractDebugLineData) {
CompileUnit = FindCompileUnitForAddress(getAddress(), BC);
}
// Insert a label at the beginning of the function. This will be our first
// basic block.
@ -335,16 +394,18 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
for (uint64_t Offset = 0; IsSimple && (Offset < getSize()); ) {
MCInst Instruction;
uint64_t Size;
uint64_t AbsoluteInstrAddr = getAddress() + Offset;
if (!BC.DisAsm->getInstruction(Instruction,
Size,
FunctionData.slice(Offset),
getAddress() + Offset,
AbsoluteInstrAddr,
nulls(),
nulls())) {
// Ignore this function. Skip to the next one.
errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
<< Twine::utohexstr(Offset) << " (address 0x"
<< Twine::utohexstr(getAddress() + Offset) << ") in function "
<< Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
<< getName() << '\n';
IsSimple = false;
break;
@ -353,13 +414,12 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
if (MIA->isUnsupported(Instruction)) {
errs() << "BOLT-WARNING: unsupported instruction seen at offset 0x"
<< Twine::utohexstr(Offset) << " (address 0x"
<< Twine::utohexstr(getAddress() + Offset) << ") in function "
<< Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
<< getName() << '\n';
IsSimple = false;
break;
}
uint64_t AbsoluteInstrAddr = getAddress() + Offset;
if (MIA->isBranch(Instruction) || MIA->isCall(Instruction)) {
uint64_t InstructionTarget = 0;
if (MIA->evaluateBranch(Instruction,
@ -476,6 +536,12 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
}
}
if (CompileUnit) {
Instruction.setLoc(
findDebugLineInformationForInstructionAt(AbsoluteInstrAddr,
CompileUnit));
}
addInstruction(Offset, std::move(Instruction));
Offset += Size;
@ -491,6 +557,45 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
return true;
}
SMLoc
BinaryFunction::findDebugLineInformationForInstructionAt(
uint64_t Address,
DWARFCompileUnit *Unit) {
// We use the pointer in SMLoc to store an instance of DebugLineTableRowRef,
// which occupies 64 bits. Thus, we can only proceed if the struct fits into
// the pointer itself.
assert(
sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef) &&
"Cannot fit instruction debug line information into SMLoc's pointer");
const DWARFDebugLine::LineTable *LineTable =
BC.DwCtx->getLineTableForUnit(Unit);
SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc();
if (!LineTable) {
return NullResult;
}
uint32_t RowIndex = LineTable->lookupAddress(Address);
if (RowIndex == LineTable->UnknownRowIndex) {
return NullResult;
}
assert(RowIndex < LineTable->Rows.size() &&
"Line Table lookup returned invalid index.");
decltype(SMLoc().getPointer()) Ptr;
DebugLineTableRowRef *InstructionLocation =
reinterpret_cast<DebugLineTableRowRef *>(&Ptr);
InstructionLocation->DwCompileUnitIndex = Unit->getOffset();
InstructionLocation->RowIndex = RowIndex;
return SMLoc::getFromPointer(Ptr);
}
bool BinaryFunction::buildCFG() {
auto &MIA = BC.MIA;

View File

@ -58,7 +58,7 @@ public:
enum LayoutType : char {
/// LT_NONE - do not change layout of basic blocks
LT_NONE = 0, /// no reordering
/// LT_REVERSE - reverse the order of basic blocks, meant for testing
/// LT_REVERSE - reverse the order of basic blocks, meant for testing
/// purposes. The first basic block is left intact and the rest are
/// put in the reverse order.
LT_REVERSE,
@ -186,6 +186,14 @@ private:
return *this;
}
/// Gets debug line information for the instruction located at the given
/// address in the original binary. The SMLoc's pointer is used
/// to point to this information, which is represented by a
/// DebugLineTableRowRef. The returned pointer is null if no debug line
/// information for this instruction was found.
SMLoc findDebugLineInformationForInstructionAt(uint64_t Address,
DWARFCompileUnit *Unit);
const BinaryBasicBlock *
getOriginalLayoutSuccessor(const BinaryBasicBlock *BB) const;
@ -434,7 +442,7 @@ public:
/// function and append it to the end of list of blocks.
/// If \p DeriveAlignment is true, set the alignment of the block based
/// on the alignment of the existing offset.
///
///
/// Returns NULL if basic block already exists at the \p Offset.
BinaryBasicBlock *addBasicBlock(uint64_t Offset, MCSymbol *Label,
bool DeriveAlignment = false) {
@ -648,6 +656,10 @@ public:
///
/// \p FunctionData is the set bytes representing the function body.
///
/// \p ExtractDebugLineData is a flag indicating whether DWARF .debug_line
/// information should be looked up and tied to each disassembled
/// instruction.
///
/// The Function should be properly initialized before this function
/// is called. I.e. function address and size should be set.
///
@ -655,7 +667,8 @@ public:
/// state to State:Disassembled.
///
/// Returns false if disassembly failed.
bool disassemble(ArrayRef<uint8_t> FunctionData);
bool disassemble(ArrayRef<uint8_t> FunctionData,
bool ExtractDebugLineData = false);
/// Builds a list of basic blocks with successor and predecessor info.
///

View File

@ -17,6 +17,7 @@ add_llvm_tool(llvm-bolt
BinaryContext.cpp
BinaryFunction.cpp
DataReader.cpp
DebugLineTableRowRef.cpp
Exceptions.cpp
RewriteInstance.cpp
)

View File

@ -0,0 +1,21 @@
//===--- DebugLineTableRowRef.cpp - Identifies a row in a .debug_line table ==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "DebugLineTableRowRef.h"
namespace llvm {
namespace bolt {
const DebugLineTableRowRef DebugLineTableRowRef::NULL_ROW{-1U, -1U};
} // namespace bolt
} // namespace llvm

View File

@ -0,0 +1,63 @@
//===--- DebugLineTableRowRef.h - Identifies a row in a .debug_line table -===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Class that references a row in a DWARFDebugLine::LineTable by the DWARF
// Context index of the DWARF Compile Unit that owns the Line Table and the row
// index. This is tied to our IR during disassembly so that we can later update
// .debug_line information.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_BOLT_DEBUGLINETABLEROWREF_H
#define LLVM_TOOLS_LLVM_BOLT_DEBUGLINETABLEROWREF_H
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/Support/SMLoc.h"
namespace llvm {
namespace bolt {
struct DebugLineTableRowRef {
uint32_t DwCompileUnitIndex;
uint32_t RowIndex;
const static DebugLineTableRowRef NULL_ROW;
bool operator==(const DebugLineTableRowRef &Rhs) const {
return DwCompileUnitIndex == Rhs.DwCompileUnitIndex &&
RowIndex == Rhs.RowIndex;
}
bool operator!=(const DebugLineTableRowRef &Rhs) const {
return !(*this == Rhs);
}
static DebugLineTableRowRef fromSMLoc(const SMLoc &Loc) {
union {
decltype(Loc.getPointer()) Ptr;
DebugLineTableRowRef Ref;
} U;
U.Ptr = Loc.getPointer();
return U.Ref;
}
SMLoc toSMLoc() const {
union {
decltype(SMLoc().getPointer()) Ptr;
DebugLineTableRowRef Ref;
} U;
U.Ref = *this;
return SMLoc::getFromPointer(U.Ptr);
}
};
} // namespace bolt
} // namespace llvm
#endif

View File

@ -94,6 +94,11 @@ SplitFunctions("split-functions",
cl::desc("split functions into hot and cold distinct regions"),
cl::Optional);
static cl::opt<bool>
UpdateDebugSections("update-debug-sections",
cl::desc("update DWARF debug sections of the executable"),
cl::Optional);
static cl::opt<BinaryFunction::LayoutType>
ReorderBlocks(
"reorder-blocks",
@ -258,7 +263,9 @@ bool ExecutableFileMemoryManager::finalizeMemory(std::string *ErrMsg) {
/// triple \p TripleName.
static std::unique_ptr<BinaryContext> CreateBinaryContext(
std::string ArchName,
std::string TripleName, const DataReader &DR) {
std::string TripleName,
const DataReader &DR,
std::unique_ptr<DWARFContext> DwCtx) {
std::string Error;
@ -343,6 +350,7 @@ static std::unique_ptr<BinaryContext> CreateBinaryContext(
auto BC =
llvm::make_unique<BinaryContext>(std::move(Ctx),
std::move(DwCtx),
std::move(TheTriple),
TheTarget,
TripleName,
@ -355,15 +363,18 @@ static std::unique_ptr<BinaryContext> CreateBinaryContext(
std::move(MIA),
std::move(MRI),
std::move(DisAsm),
DR);
DR,
opts::UpdateDebugSections);
return BC;
}
RewriteInstance::RewriteInstance(ELFObjectFileBase *File,
const DataReader &DR)
: File(File), BC(CreateBinaryContext("x86-64", "x86_64-unknown-linux", DR)),
DwCtx(new DWARFContextInMemory(*File)) {}
: File(File),
BC(CreateBinaryContext("x86-64", "x86_64-unknown-linux", DR,
std::unique_ptr<DWARFContext>(new DWARFContextInMemory(*File))))
{ }
RewriteInstance::~RewriteInstance() {}
@ -371,8 +382,8 @@ void RewriteInstance::reset() {
BinaryFunctions.clear();
FileSymRefs.clear();
auto &DR = BC->DR;
BC = CreateBinaryContext("x86-64", "x86_64-unknown-linux", DR);
DwCtx.reset(new DWARFContextInMemory(*File));
BC = CreateBinaryContext("x86-64", "x86_64-unknown-linux", DR,
std::unique_ptr<DWARFContext>(new DWARFContextInMemory(*File)));
CFIRdWrt.reset(nullptr);
SectionMM.reset(nullptr);
Out.reset(nullptr);
@ -615,7 +626,7 @@ void RewriteInstance::readSpecialSections() {
FrameHdrCopy =
std::vector<char>(FrameHdrContents.begin(), FrameHdrContents.end());
// Process debug sections.
EHFrame = DwCtx->getEHFrame();
EHFrame = BC->DwCtx->getEHFrame();
if (opts::DumpEHFrame) {
EHFrame->dump(outs());
}
@ -684,11 +695,11 @@ void RewriteInstance::disassembleFunctions() {
(SectionContents.data()) + FunctionOffset,
Function.getSize());
if (!Function.disassemble(FunctionData))
if (!Function.disassemble(FunctionData, opts::UpdateDebugSections))
continue;
if (opts::PrintAll || opts::PrintDisasm)
Function.print(errs(), "after disassembly");
Function.print(errs(), "after disassembly", true);
if (!Function.isSimple())
continue;
@ -711,7 +722,7 @@ void RewriteInstance::disassembleFunctions() {
continue;
if (opts::PrintAll || opts::PrintCFG)
Function.print(errs(), "after building cfg");
Function.print(errs(), "after building cfg", true);
TotalScore += Function.getFunctionScore();
@ -822,13 +833,13 @@ void RewriteInstance::runOptimizationPasses() {
}
if (opts::PrintAll || opts::PrintUCE)
Function.print(errs(), "after unreachable code elimination");
Function.print(errs(), "after unreachable code elimination", true);
}
if (opts::ReorderBlocks != BinaryFunction::LT_NONE) {
BFI.second.modifyLayout(opts::ReorderBlocks, opts::SplitFunctions);
if (opts::PrintAll || opts::PrintReordered)
Function.print(errs(), "after reordering blocks");
Function.print(errs(), "after reordering blocks", true);
}
// Post-processing passes.
@ -844,8 +855,7 @@ void RewriteInstance::runOptimizationPasses() {
// Update exception handling information.
Function.updateEHRanges();
if (opts::PrintAll || opts::PrintEHRanges)
Function.print(errs(), "after updating EH ranges");
Function.print(errs(), "after updating EH ranges", true);
}
}

View File

@ -161,7 +161,6 @@ private:
llvm::object::ELFObjectFileBase *File;
std::unique_ptr<BinaryContext> BC;
std::unique_ptr<DWARFContext> DwCtx;
std::unique_ptr<CFIReaderWriter> CFIRdWrt;
/// Our in-memory intermediary object file where we hold final code for
/// rewritten functions.