[lld-macho] Emit indirect symbol table

Makes it a little easier to read objdump's disassembly.

Reviewed By: #lld-macho, gkm

Differential Revision: https://reviews.llvm.org/D87178
This commit is contained in:
Jez Ng 2020-09-04 18:02:07 -07:00
parent cd7cb0c303
commit 5d26bd3b75
6 changed files with 156 additions and 9 deletions

View File

@ -63,6 +63,8 @@ public:
uint64_t fileOff = 0;
uint32_t align = 1;
uint32_t flags = 0;
uint32_t reserved1 = 0;
uint32_t reserved2 = 0;
private:
Kind sectionKind;

View File

@ -72,6 +72,8 @@ public:
uint32_t stubsIndex = UINT32_MAX;
uint32_t symtabIndex = UINT32_MAX;
protected:
Symbol(Kind k, StringRefZ name) : symbolKind(k), name(name) {}

View File

@ -296,7 +296,10 @@ void macho::addNonLazyBindingEntries(const Symbol *sym,
}
StubsSection::StubsSection()
: SyntheticSection(segment_names::text, "__stubs") {}
: SyntheticSection(segment_names::text, "__stubs") {
flags = MachO::S_SYMBOL_STUBS;
reserved2 = target->stubSize;
}
uint64_t StubsSection::getSize() const {
return entries.size() * target->stubSize;
@ -464,9 +467,12 @@ uint64_t SymtabSection::getRawSize() const {
void SymtabSection::finalizeContents() {
// TODO support other symbol types
for (Symbol *sym : symtab->getSymbols())
if (isa<Defined>(sym))
for (Symbol *sym : symtab->getSymbols()) {
if (isa<Defined>(sym) || sym->isInGot() || sym->isInStubs()) {
sym->symtabIndex = symbols.size();
symbols.push_back({sym, stringTableSection.addString(sym->getName())});
}
}
}
void SymtabSection::writeTo(uint8_t *buf) const {
@ -486,6 +492,47 @@ void SymtabSection::writeTo(uint8_t *buf) const {
}
}
IndirectSymtabSection::IndirectSymtabSection()
: LinkEditSection(segment_names::linkEdit,
section_names::indirectSymbolTable) {}
uint32_t IndirectSymtabSection::getNumSymbols() const {
return in.got->getEntries().size() + in.tlvPointers->getEntries().size() +
in.stubs->getEntries().size();
}
bool IndirectSymtabSection::isNeeded() const {
return in.got->isNeeded() || in.tlvPointers->isNeeded() ||
in.stubs->isNeeded();
}
void IndirectSymtabSection::finalizeContents() {
uint32_t off = 0;
in.got->reserved1 = off;
off += in.got->getEntries().size();
in.tlvPointers->reserved1 = off;
off += in.tlvPointers->getEntries().size();
// There is a 1:1 correspondence between stubs and LazyPointerSection
// entries, so they can share the same sub-array in the table.
in.stubs->reserved1 = in.lazyPointers->reserved1 = off;
}
void IndirectSymtabSection::writeTo(uint8_t *buf) const {
uint32_t off = 0;
for (const Symbol *sym : in.got->getEntries()) {
write32le(buf + off * sizeof(uint32_t), sym->symtabIndex);
++off;
}
for (const Symbol *sym : in.tlvPointers->getEntries()) {
write32le(buf + off * sizeof(uint32_t), sym->symtabIndex);
++off;
}
for (const Symbol *sym : in.stubs->getEntries()) {
write32le(buf + off * sizeof(uint32_t), sym->symtabIndex);
++off;
}
}
StringTableSection::StringTableSection()
: LinkEditSection(segment_names::linkEdit, section_names::stringTable) {}

View File

@ -33,6 +33,7 @@ constexpr const char weakBinding[] = "__weak_binding";
constexpr const char lazyBinding[] = "__lazy_binding";
constexpr const char export_[] = "__export";
constexpr const char symbolTable[] = "__symbol_table";
constexpr const char indirectSymbolTable[] = "__ind_sym_tab";
constexpr const char stringTable[] = "__string_table";
constexpr const char got[] = "__got";
constexpr const char threadPtrs[] = "__thread_ptrs";
@ -391,6 +392,28 @@ private:
std::vector<SymtabEntry> symbols;
};
// The indirect symbol table is a list of 32-bit integers that serve as indices
// into the (actual) symbol table. The indirect symbol table is a
// concatentation of several sub-arrays of indices, each sub-array belonging to
// a separate section. The starting offset of each sub-array is stored in the
// reserved1 header field of the respective section.
//
// These sub-arrays provide symbol information for sections that store
// contiguous sequences of symbol references. These references can be pointers
// (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g.
// function stubs).
class IndirectSymtabSection : public LinkEditSection {
public:
IndirectSymtabSection();
void finalizeContents();
uint32_t getNumSymbols() const;
uint64_t getRawSize() const override {
return getNumSymbols() * sizeof(uint32_t);
}
bool isNeeded() const override;
void writeTo(uint8_t *buf) const override;
};
struct InStruct {
MachHeaderSection *header = nullptr;
BindingSection *binding = nullptr;

View File

@ -60,6 +60,7 @@ public:
MachHeaderSection *header = nullptr;
StringTableSection *stringTableSection = nullptr;
SymtabSection *symtabSection = nullptr;
IndirectSymtabSection *indirectSymtabSection = nullptr;
UnwindInfoSection *unwindInfoSection = nullptr;
};
@ -105,13 +106,20 @@ public:
class LCDysymtab : public LoadCommand {
public:
LCDysymtab(IndirectSymtabSection *indirectSymtabSection)
: indirectSymtabSection(indirectSymtabSection) {}
uint32_t getSize() const override { return sizeof(dysymtab_command); }
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<dysymtab_command *>(buf);
c->cmd = LC_DYSYMTAB;
c->cmdsize = getSize();
c->indirectsymoff = indirectSymtabSection->fileOff;
c->nindirectsyms = indirectSymtabSection->getNumSymbols();
}
IndirectSymtabSection *indirectSymtabSection = nullptr;
};
class LCSegment : public LoadCommand {
@ -163,6 +171,8 @@ public:
sectHdr->align = Log2_32(osec->align);
sectHdr->flags = osec->flags;
sectHdr->size = osec->getSize();
sectHdr->reserved1 = osec->reserved1;
sectHdr->reserved2 = osec->reserved2;
}
}
@ -339,7 +349,7 @@ void Writer::createLoadCommands() {
in.header->addLoadCommand(
make<LCDyldInfo>(in.binding, in.weakBinding, in.lazyBinding, in.exports));
in.header->addLoadCommand(make<LCSymtab>(symtabSection, stringTableSection));
in.header->addLoadCommand(make<LCDysymtab>());
in.header->addLoadCommand(make<LCDysymtab>(indirectSymtabSection));
for (StringRef path : config->runtimePaths)
in.header->addLoadCommand(make<LCRPath>(path));
@ -438,11 +448,12 @@ static int sectionOrder(OutputSection *osec) {
.Default(0);
} else if (segname == segment_names::linkEdit) {
return StringSwitch<int>(osec->name)
.Case(section_names::binding, -6)
.Case(section_names::weakBinding, -5)
.Case(section_names::lazyBinding, -4)
.Case(section_names::export_, -3)
.Case(section_names::symbolTable, -2)
.Case(section_names::binding, -7)
.Case(section_names::weakBinding, -6)
.Case(section_names::lazyBinding, -5)
.Case(section_names::export_, -4)
.Case(section_names::symbolTable, -3)
.Case(section_names::indirectSymbolTable, -2)
.Case(section_names::stringTable, -1)
.Default(0);
}
@ -494,6 +505,7 @@ void Writer::createOutputSections() {
stringTableSection = make<StringTableSection>();
unwindInfoSection = make<UnwindInfoSection>(); // TODO(gkm): only when no -r
symtabSection = make<SymtabSection>(*stringTableSection);
indirectSymtabSection = make<IndirectSymtabSection>();
switch (config->outputType) {
case MH_EXECUTE:
@ -614,6 +626,7 @@ void Writer::run() {
in.lazyBinding->finalizeContents();
in.exports->finalizeContents();
symtabSection->finalizeContents();
indirectSymtabSection->finalizeContents();
// Now that __LINKEDIT is filled out, do a proper calculation of its
// addresses and offsets.

View File

@ -0,0 +1,60 @@
# REQUIRES: x86
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libfoo.s -o %t/libfoo.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
# RUN: lld -flavor darwinnew -dylib %t/libfoo.o -o %t/libfoo.dylib -syslibroot %S/Inputs/MacOSX.sdk -lSystem
# RUN: lld -flavor darwinnew %t/test.o %t/libfoo.dylib -o %t/test -syslibroot %S/Inputs/MacOSX.sdk -lSystem
# RUN: llvm-objdump --macho -d --no-show-raw-insn --indirect-symbols %t/test | FileCheck %s
# CHECK: (__TEXT,__text) section
# CHECK-NEXT: _main:
# CHECK-NEXT: movq {{.*}}(%rip), %rax ## literal pool symbol address: _foo
# CHECK-NEXT: movq {{.*}}(%rip), %rax ## literal pool symbol address: _bar
# CHECK-NEXT: movq {{.*}}(%rip), %rax ## literal pool symbol address: _foo_tlv
# CHECK-NEXT: movq {{.*}}(%rip), %rax ## literal pool symbol address: _bar_tlv
# CHECK-NEXT: callq {{.*}} ## symbol stub for: _foo_fn
# CHECK-NEXT: callq {{.*}} ## symbol stub for: _bar_fn
# CHECK-NEXT: retq
# CHECK: Indirect symbols for (__TEXT,__stubs) 2 entries
# CHECK-NEXT: address index name
# CHECK-NEXT: _bar_fn
# CHECK-NEXT: _foo_fn
# CHECK-NEXT: Indirect symbols for (__DATA,__thread_ptrs) 2 entries
# CHECK-NEXT: address index name
# CHECK-NEXT: _bar_tlv
# CHECK-NEXT: _foo_tlv
# CHECK-NEXT: Indirect symbols for (__DATA,__la_symbol_ptr) 2 entries
# CHECK-NEXT: address index name
# CHECK-NEXT: _bar_fn
# CHECK-NEXT: _foo_fn
# CHECK-NEXT: Indirect symbols for (__DATA_CONST,__got) 3 entries
# CHECK-NEXT: address index name
# CHECK-NEXT: _bar
# CHECK-NEXT: _foo
# CHECK-NEXT: _stub_binder
#--- libfoo.s
.globl _foo, _foo_fn, _bar, _bar_fn
_foo:
_foo_fn:
_bar:
_bar_fn:
.section __DATA,__thread_vars,thread_local_variables
.globl _foo_tlv, _bar_tlv
_foo_tlv:
_bar_tlv:
#--- test.s
.globl _main
_main:
movq _foo@GOTPCREL(%rip), %rax
movq _bar@GOTPCREL(%rip), %rax
mov _foo_tlv@TLVP(%rip), %rax
mov _bar_tlv@TLVP(%rip), %rax
callq _foo_fn
callq _bar_fn
ret