[obj2yaml][yaml2obj] Support for reading and dumping the MachO export trie

The MachO export trie is a serially encoded trie keyed by symbol name. This code parses the trie and preserves the structure so that it can be dumped again.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271300 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Bieneman 2016-05-31 17:26:36 +00:00
parent 980cda98b1
commit 7e74f7a10f
5 changed files with 401 additions and 10 deletions

View File

@ -71,11 +71,23 @@ struct BindOpcode {
StringRef Symbol;
};
struct ExportEntry {
uint64_t TerminalSize;
uint64_t NodeOffset;
std::string Name;
llvm::yaml::Hex64 Flags;
llvm::yaml::Hex64 Address;
llvm::yaml::Hex64 Other;
std::string ImportName;
std::vector<MachOYAML::ExportEntry> Children;
};
struct LinkEditData {
std::vector<MachOYAML::RebaseOpcode> RebaseOpcodes;
std::vector<MachOYAML::BindOpcode> BindOpcodes;
std::vector<MachOYAML::BindOpcode> WeakBindOpcodes;
std::vector<MachOYAML::BindOpcode> LazyBindOpcodes;
MachOYAML::ExportEntry ExportTrie;
};
struct Object {
@ -95,6 +107,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::Hex64)
LLVM_YAML_IS_SEQUENCE_VECTOR(int64_t)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::RebaseOpcode)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::BindOpcode)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::ExportEntry)
namespace llvm {
namespace yaml {
@ -123,6 +136,10 @@ template <> struct MappingTraits<MachOYAML::BindOpcode> {
static void mapping(IO &IO, MachOYAML::BindOpcode &BindOpcode);
};
template <> struct MappingTraits<MachOYAML::ExportEntry> {
static void mapping(IO &IO, MachOYAML::ExportEntry &ExportEntry);
};
template <> struct MappingTraits<MachOYAML::Section> {
static void mapping(IO &IO, MachOYAML::Section &Section);
};

View File

@ -103,6 +103,7 @@ void MappingTraits<MachOYAML::LinkEditData>::mapping(
IO.mapOptional("BindOpcodes", LinkEditData.BindOpcodes);
IO.mapOptional("WeakBindOpcodes", LinkEditData.WeakBindOpcodes);
IO.mapOptional("LazyBindOpcodes", LinkEditData.LazyBindOpcodes);
IO.mapOptional("ExportTrie", LinkEditData.ExportTrie);
}
void MappingTraits<MachOYAML::RebaseOpcode>::mapping(
@ -121,6 +122,18 @@ void MappingTraits<MachOYAML::BindOpcode>::mapping(
IO.mapOptional("Symbol", BindOpcode.Symbol);
}
void MappingTraits<MachOYAML::ExportEntry>::mapping(
IO &IO, MachOYAML::ExportEntry &ExportEntry) {
IO.mapRequired("TerminalSize", ExportEntry.TerminalSize);
IO.mapOptional("NodeOffset", ExportEntry.NodeOffset);
IO.mapOptional("Name", ExportEntry.Name);
IO.mapOptional("Flags", ExportEntry.Flags);
IO.mapOptional("Address", ExportEntry.Address);
IO.mapOptional("Other", ExportEntry.Other);
IO.mapOptional("ImportName", ExportEntry.ImportName);
IO.mapOptional("Children", ExportEntry.Children);
}
template <typename StructType>
void mapLoadCommandData(IO &IO, MachOYAML::LoadCommand &LoadCommand) {}
@ -142,6 +155,12 @@ void mapLoadCommandData<MachO::dylib_command>(
IO.mapOptional("PayloadString", LoadCommand.PayloadString);
}
template <>
void mapLoadCommandData<MachO::rpath_command>(
IO &IO, MachOYAML::LoadCommand &LoadCommand) {
IO.mapOptional("PayloadString", LoadCommand.PayloadString);
}
template <>
void mapLoadCommandData<MachO::dylinker_command>(
IO &IO, MachOYAML::LoadCommand &LoadCommand) {

View File

@ -0,0 +1,191 @@
# RUN: yaml2obj -format=macho %s | obj2yaml | FileCheck %s
--- !mach-o
FileHeader:
magic: 0xFEEDFACF
cputype: 0x01000007
cpusubtype: 0x80000003
filetype: 0x00000002
ncmds: 16
sizeofcmds: 1408
flags: 0x00218085
reserved: 0x00000000
LoadCommands:
- cmd: LC_SEGMENT_64
cmdsize: 72
segname: __PAGEZERO
vmaddr: 0
vmsize: 4294967296
fileoff: 0
filesize: 0
maxprot: 0
initprot: 0
nsects: 0
flags: 0
- cmd: LC_SEGMENT_64
cmdsize: 552
segname: __TEXT
vmaddr: 4294967296
vmsize: 8192
fileoff: 0
filesize: 8192
maxprot: 7
initprot: 5
nsects: 6
flags: 0
- cmd: LC_SEGMENT_64
cmdsize: 312
segname: __DATA
vmaddr: 4294975488
vmsize: 4096
fileoff: 8192
filesize: 4096
maxprot: 7
initprot: 3
nsects: 3
flags: 0
- cmd: LC_SEGMENT_64
cmdsize: 72
segname: __LINKEDIT
vmaddr: 4294979584
vmsize: 4096
fileoff: 12288
filesize: 2508
maxprot: 7
initprot: 1
nsects: 0
flags: 0
- cmd: LC_DYLD_INFO_ONLY
cmdsize: 48
rebase_off: 12288
rebase_size: 8
bind_off: 12296
bind_size: 96
weak_bind_off: 0
weak_bind_size: 0
lazy_bind_off: 12392
lazy_bind_size: 624
export_off: 13016
export_size: 48
- cmd: LC_SYMTAB
cmdsize: 24
symoff: 13080
nsyms: 30
stroff: 13700
strsize: 1096
- cmd: LC_DYSYMTAB
cmdsize: 80
ilocalsym: 0
nlocalsym: 9
iextdefsym: 9
nextdefsym: 2
iundefsym: 11
nundefsym: 19
tocoff: 0
ntoc: 0
modtaboff: 0
nmodtab: 0
extrefsymoff: 0
nextrefsyms: 0
indirectsymoff: 13560
nindirectsyms: 35
extreloff: 0
nextrel: 0
locreloff: 0
nlocrel: 0
- cmd: LC_LOAD_DYLINKER
cmdsize: 32
name: 12
PayloadString: /usr/lib/dyld
ZeroPadBytes: 7
- cmd: LC_UUID
cmdsize: 24
uuid: 461A1B28-822F-3F38-B670-645419E636F5
- cmd: LC_VERSION_MIN_MACOSX
cmdsize: 16
version: 658176
sdk: 658176
- cmd: LC_SOURCE_VERSION
cmdsize: 16
version: 0
- cmd: LC_MAIN
cmdsize: 24
entryoff: 4448
stacksize: 0
- cmd: LC_LOAD_DYLIB
cmdsize: 48
dylib:
name: 24
timestamp: 2
current_version: 7864576
compatibility_version: 65536
PayloadString: '/usr/lib/libc++.1.dylib'
ZeroPadBytes: 1
- cmd: LC_LOAD_DYLIB
cmdsize: 56
dylib:
name: 24
timestamp: 2
current_version: 80349697
compatibility_version: 65536
PayloadString: /usr/lib/libSystem.B.dylib
ZeroPadBytes: 6
- cmd: LC_FUNCTION_STARTS
cmdsize: 16
dataoff: 13064
datasize: 16
- cmd: LC_DATA_IN_CODE
cmdsize: 16
dataoff: 13080
datasize: 0
LinkEditData:
ExportTrie:
TerminalSize: 0
NodeOffset: 0
Name: ''
Flags: 0x0000000000000000
Address: 0x0000000000000000
Other: 0x0000000000000000
ImportName: ''
Children:
- TerminalSize: 0
NodeOffset: 5
Name: _
Flags: 0x0000000000000000
Address: 0x0000000000000000
Other: 0x0000000000000000
ImportName: ''
Children:
- TerminalSize: 2
NodeOffset: 33
Name: _mh_execute_header
Flags: 0x0000000000000000
Address: 0x0000000000000000
Other: 0x0000000000000000
ImportName: ''
- TerminalSize: 3
NodeOffset: 37
Name: main
Flags: 0x0000000000000000
Address: 0x0000000000001160
Other: 0x0000000000000000
ImportName: ''
...
#CHECK: ExportTrie:
#CHECK: TerminalSize: 0
#CHECK: NodeOffset: 0
#CHECK: Name: ''
#CHECK: Children:
#CHECK: - TerminalSize: 0
#CHECK: NodeOffset: 5
#CHECK: Name: _
#CHECK: Children:
#CHECK: - TerminalSize: 2
#CHECK: NodeOffset: 33
#CHECK: Name: _mh_execute_header
#CHECK: Address: 0x0000000000000000
#CHECK: - TerminalSize: 3
#CHECK: NodeOffset: 37
#CHECK: Name: main
#CHECK: Address: 0x0000000000001160

View File

@ -32,6 +32,7 @@ class MachODumper {
void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y);
void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes,
ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false);
void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y);
public:
MachODumper(const object::MachOObjectFile &O) : Obj(O) {}
@ -149,6 +150,13 @@ const char *MachODumper::processLoadCommandData<MachO::dylinker_command>(
return readString<MachO::dylinker_command>(LC, LoadCmd);
}
template <>
const char *MachODumper::processLoadCommandData<MachO::rpath_command>(
MachOYAML::LoadCommand &LC,
const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
return readString<MachO::rpath_command>(LC, LoadCmd);
}
Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
auto Y = make_unique<MachOYAML::Object>();
dumpHeader(Y);
@ -199,8 +207,9 @@ void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) {
dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes());
dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes,
Obj.getDyldInfoWeakBindOpcodes());
dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes,
Obj.getDyldInfoLazyBindOpcodes(), true);
dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(),
true);
dumpExportTrie(Y);
}
void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) {
@ -244,6 +253,13 @@ void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) {
}
}
StringRef ReadStringRef(const uint8_t *Start) {
const uint8_t *Itr = Start;
for (; *Itr; ++Itr)
;
return StringRef(reinterpret_cast<const char *>(Start), Itr - Start);
}
void MachODumper::dumpBindOpcodes(
std::vector<MachOYAML::BindOpcode> &BindOpcodes,
ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) {
@ -257,7 +273,6 @@ void MachODumper::dumpBindOpcodes(
unsigned Count;
uint64_t ULEB = 0;
int64_t SLEB = 0;
const uint8_t *SymStart;
switch (BindOp.Opcode) {
case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
@ -282,12 +297,8 @@ void MachODumper::dumpBindOpcodes(
break;
case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
SymStart = ++OpCode;
while (*OpCode) {
++OpCode;
}
BindOp.Symbol = StringRef(reinterpret_cast<const char *>(SymStart),
OpCode - SymStart);
BindOp.Symbol = ReadStringRef(OpCode + 1);
OpCode += BindOp.Symbol.size() + 1;
break;
default:
break;
@ -302,6 +313,117 @@ void MachODumper::dumpBindOpcodes(
}
}
/*!
* /brief processes a node from the export trie, and its children.
*
* To my knowledge there is no documentation of the encoded format of this data
* other than in the heads of the Apple linker engineers. To that end hopefully
* this comment and the implementation below can serve to light the way for
* anyone crazy enough to come down this path in the future.
*
* This function reads and preserves the trie structure of the export trie. To
* my knowledge there is no code anywhere else that reads the data and preserves
* the Trie. LD64 (sources available at opensource.apple.com) has a similar
* implementation that parses the export trie into a vector. That code as well
* as LLVM's libObject MachO implementation were the basis for this.
*
* The export trie is an encoded trie. The node serialization is a bit awkward.
* The below pseudo-code is the best description I've come up with for it.
*
* struct SerializedNode {
* ULEB128 TerminalSize;
* struct TerminalData { <-- This is only present if TerminalSize > 0
* ULEB128 Flags;
* ULEB128 Address; <-- Present if (! Flags & REEXPORT )
* ULEB128 Other; <-- Present if ( Flags & REEXPORT ||
* Flags & STUB_AND_RESOLVER )
* char[] ImportName; <-- Present if ( Flags & REEXPORT )
* }
* uint8_t ChildrenCount;
* Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount];
* SerializedNode Children[ChildrenCount]
* }
*
* Terminal nodes are nodes that represent actual exports. They can appear
* anywhere in the tree other than at the root; they do not need to be leaf
* nodes. When reading the data out of the trie this routine reads it in-order,
* but it puts the child names and offsets directly into the child nodes. This
* results in looping over the children twice during serialization and
* de-serialization, but it makes the YAML representation more human readable.
*
* Below is an example of the graph from a "Hello World" executable:
*
* -------
* | '' |
* -------
* |
* -------
* | '_' |
* -------
* |
* |----------------------------------------|
* | |
* ------------------------ ---------------------
* | '_mh_execute_header' | | 'main' |
* | Flags: 0x00000000 | | Flags: 0x00000000 |
* | Addr: 0x00000000 | | Addr: 0x00001160 |
* ------------------------ ---------------------
*
* This graph represents the trie for the exports "__mh_execute_header" and
* "_main". In the graph only the "_main" and "__mh_execute_header" nodes are
* terminal.
*/
const uint8_t *processExportNode(const uint8_t *CurrPtr,
const uint8_t *const End,
MachOYAML::ExportEntry &Entry) {
if (CurrPtr >= End)
return CurrPtr;
unsigned Count = 0;
Entry.TerminalSize = decodeULEB128(CurrPtr, &Count);
CurrPtr += Count;
if (Entry.TerminalSize != 0) {
Entry.Flags = decodeULEB128(CurrPtr, &Count);
CurrPtr += Count;
if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
Entry.Address = 0;
Entry.Other = decodeULEB128(CurrPtr, &Count);
CurrPtr += Count;
Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr));
} else {
Entry.Address = decodeULEB128(CurrPtr, &Count);
CurrPtr += Count;
if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
Entry.Other = decodeULEB128(CurrPtr, &Count);
CurrPtr += Count;
} else
Entry.Other = 0;
}
}
uint8_t childrenCount = *CurrPtr++;
if (childrenCount == 0)
return CurrPtr;
Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount,
MachOYAML::ExportEntry());
for (auto &Child : Entry.Children) {
Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr));
CurrPtr += Child.Name.length() + 1;
Child.NodeOffset = decodeULEB128(CurrPtr, &Count);
CurrPtr += Count;
}
for (auto &Child : Entry.Children) {
CurrPtr = processExportNode(CurrPtr, End, Child);
}
return CurrPtr;
}
void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) {
MachOYAML::LinkEditData &LEData = Y->LinkEdit;
auto ExportsTrie = Obj.getDyldInfoExportsTrie();
processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie);
}
Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) {
MachODumper Dumper(Obj);
Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();

View File

@ -48,7 +48,9 @@ private:
Error writeLinkEditData(raw_ostream &OS);
void writeBindOpcodes(raw_ostream &OS, uint64_t offset,
std::vector<MachOYAML::BindOpcode> &BindOpcodes);
Error writeExportTrie(raw_ostream &OS);
void dumpExportEntry(raw_ostream &OS, MachOYAML::ExportEntry &Entry);
void ZeroToOffset(raw_ostream &OS, size_t offset);
MachOYAML::Object &Obj;
@ -161,6 +163,12 @@ size_t writeLoadCommandData<MachO::dylinker_command>(MachOYAML::LoadCommand &LC,
return writePayloadString(LC, OS);
}
template <>
size_t writeLoadCommandData<MachO::rpath_command>(MachOYAML::LoadCommand &LC,
raw_ostream &OS) {
return writePayloadString(LC, OS);
}
void ZeroFillBytes(raw_ostream &OS, size_t Size) {
std::vector<uint8_t> FillData;
FillData.insert(FillData.begin(), Size, 0);
@ -282,11 +290,41 @@ void MachOWriter::writeBindOpcodes(
}
if (!Opcode.Symbol.empty()) {
OS.write(Opcode.Symbol.data(), Opcode.Symbol.size());
OS.write("\0", 1);
OS.write('\0');
}
}
}
void MachOWriter::dumpExportEntry(raw_ostream &OS, MachOYAML::ExportEntry &Entry) {
encodeSLEB128(Entry.TerminalSize, OS);
if (Entry.TerminalSize > 0) {
encodeSLEB128(Entry.Flags, OS);
if ( Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT ) {
encodeSLEB128(Entry.Other, OS);
OS << Entry.ImportName;
OS.write('\0');
}
else {
encodeSLEB128(Entry.Address, OS);
if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER)
encodeSLEB128(Entry.Other, OS);
}
}
OS.write(static_cast<uint8_t>(Entry.Children.size()));
for (auto EE : Entry.Children){
OS << EE.Name;
OS.write('\0');
encodeSLEB128(EE.NodeOffset, OS);
}
for (auto EE : Entry.Children)
dumpExportEntry(OS, EE);
}
Error MachOWriter::writeExportTrie(raw_ostream &OS) {
dumpExportEntry(OS, Obj.LinkEdit.ExportTrie);
return Error::success();
}
Error MachOWriter::writeLinkEditData(raw_ostream &OS) {
MachOYAML::LinkEditData &LinkEdit = Obj.LinkEdit;
MachO::dyld_info_command *DyldInfoOnlyCmd = 0;
@ -318,6 +356,10 @@ Error MachOWriter::writeLinkEditData(raw_ostream &OS) {
writeBindOpcodes(OS, DyldInfoOnlyCmd->lazy_bind_off,
LinkEdit.LazyBindOpcodes);
ZeroToOffset(OS, DyldInfoOnlyCmd->export_off);
if(auto Err = writeExportTrie(OS))
return Err;
// Fill to the end of the string table
ZeroToOffset(OS, SymtabCmd->stroff + SymtabCmd->strsize);