[lld-macho][nfc] Refactor in preparation for 32-bit support

The main challenge was handling the different on-disk structures (e.g.
`mach_header` vs `mach_header_64`). I tried to strike a balance between
sprinkling `target->wordSize == 8` checks everywhere (branchy = slow, and ugly)
and templatizing everything (causes code bloat, also ugly). I think I struck a
decent balance by judicious use of type erasure.

Note that LLD-ELF has a similar architecture, though it seems to use more templating.

Linking chromium_framework takes about the same time before and after this
change:

      N           Min           Max        Median           Avg        Stddev
  x  20          4.52          4.67         4.595        4.5945   0.044423204
  +  20           4.5          4.71         4.575         4.582   0.056344803
  No difference proven at 95.0% confidence

Reviewed By: #lld-macho, oontvoo

Differential Revision: https://reviews.llvm.org/D99633
This commit is contained in:
Jez Ng 2021-04-02 18:46:18 -04:00
parent 8156d899ab
commit 817d98d841
16 changed files with 285 additions and 140 deletions

View File

@ -28,7 +28,7 @@ namespace {
struct ARM64 : TargetInfo {
ARM64();
int64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
int64_t getEmbeddedAddend(MemoryBufferRef, uint64_t offset,
const relocation_info) const override;
void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,
uint64_t pc) const override;
@ -77,7 +77,7 @@ const RelocAttrs &ARM64::getRelocAttrs(uint8_t type) const {
return relocAttrsArray[type];
}
int64_t ARM64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
int64_t ARM64::getEmbeddedAddend(MemoryBufferRef mb, uint64_t offset,
const relocation_info rel) const {
if (rel.r_type != ARM64_RELOC_UNSIGNED &&
rel.r_type != ARM64_RELOC_SUBTRACTOR) {
@ -88,7 +88,7 @@ int64_t ARM64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
}
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
const uint8_t *loc = buf + sec.offset + rel.r_address;
const uint8_t *loc = buf + offset + rel.r_address;
switch (rel.r_length) {
case 2:
return static_cast<int32_t>(read32le(loc));
@ -221,7 +221,8 @@ void ARM64::writeStub(uint8_t *buf8, const Symbol &sym) const {
auto *buf32 = reinterpret_cast<uint32_t *>(buf8);
uint64_t pcPageBits =
pageBits(in.stubs->addr + sym.stubsIndex * sizeof(stubCode));
uint64_t lazyPointerVA = in.lazyPointers->addr + sym.stubsIndex * WordSize;
uint64_t lazyPointerVA =
in.lazyPointers->addr + sym.stubsIndex * LP64::wordSize;
buf32[0] = encodePage21({&sym, "stub"}, stubCode[0],
pageBits(lazyPointerVA) - pcPageBits);
buf32[1] = encodePageOff12(stubCode[1], lazyPointerVA);
@ -249,7 +250,7 @@ void ARM64::writeStubHelperHeader(uint8_t *buf8) const {
buf32[1] = encodePageOff12(stubHelperHeaderCode[1], loaderVA);
buf32[2] = stubHelperHeaderCode[2];
uint64_t binderVA =
in.got->addr + in.stubHelper->stubBinder->gotIndex * WordSize;
in.got->addr + in.stubHelper->stubBinder->gotIndex * LP64::wordSize;
buf32[3] = encodePage21(d, stubHelperHeaderCode[3],
pageBits(binderVA) - pcPageBits(3));
buf32[4] = encodePageOff12(stubHelperHeaderCode[4], binderVA);
@ -291,7 +292,7 @@ void ARM64::relaxGotLoad(uint8_t *loc, uint8_t type) const {
write32le(loc, instruction);
}
ARM64::ARM64() {
ARM64::ARM64() : TargetInfo(LP64()) {
cpuType = CPU_TYPE_ARM64;
cpuSubtype = CPU_SUBTYPE_ARM64_ALL;

View File

@ -25,7 +25,7 @@ namespace {
struct X86_64 : TargetInfo {
X86_64();
int64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
int64_t getEmbeddedAddend(MemoryBufferRef, uint64_t offset,
const relocation_info) const override;
void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,
uint64_t relocVA) const override;
@ -77,10 +77,10 @@ static int pcrelOffset(uint8_t type) {
}
}
int64_t X86_64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
int64_t X86_64::getEmbeddedAddend(MemoryBufferRef mb, uint64_t offset,
relocation_info rel) const {
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
const uint8_t *loc = buf + sec.offset + rel.r_address;
const uint8_t *loc = buf + offset + rel.r_address;
switch (rel.r_length) {
case 2:
@ -142,7 +142,7 @@ void X86_64::writeStub(uint8_t *buf, const Symbol &sym) const {
memcpy(buf, stub, 2); // just copy the two nonzero bytes
uint64_t stubAddr = in.stubs->addr + sym.stubsIndex * sizeof(stub);
writeRipRelative({&sym, "stub"}, buf, stubAddr, sizeof(stub),
in.lazyPointers->addr + sym.stubsIndex * WordSize);
in.lazyPointers->addr + sym.stubsIndex * LP64::wordSize);
}
static constexpr uint8_t stubHelperHeader[] = {
@ -159,7 +159,7 @@ void X86_64::writeStubHelperHeader(uint8_t *buf) const {
in.imageLoaderCache->getVA());
writeRipRelative(d, buf, in.stubHelper->addr, 0xf,
in.got->addr +
in.stubHelper->stubBinder->gotIndex * WordSize);
in.stubHelper->stubBinder->gotIndex * LP64::wordSize);
}
static constexpr uint8_t stubHelperEntry[] = {
@ -182,7 +182,7 @@ void X86_64::relaxGotLoad(uint8_t *loc, uint8_t type) const {
loc[-2] = 0x8d;
}
X86_64::X86_64() {
X86_64::X86_64() : TargetInfo(LP64()) {
cpuType = CPU_TYPE_X86_64;
cpuSubtype = CPU_SUBTYPE_X86_64_ALL;

View File

@ -1104,7 +1104,11 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
"\n>>> referenced from option -exported_symbol(s_list)");
}
createSyntheticSections();
if (target->wordSize == 8)
createSyntheticSections<LP64>();
else
createSyntheticSections<ILP32>();
createSyntheticSymbols();
for (const Arg *arg : args.filtered(OPT_sectcreate)) {
@ -1127,7 +1131,10 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
}
// Write to an output file.
writeResult();
if (target->wordSize == 8)
writeResult<LP64>();
else
writeResult<ILP32>();
depTracker->write(getLLDVersion(), inputFiles, config->outputFile);
}

View File

@ -10,6 +10,7 @@
#include "Driver.h"
#include "InputFiles.h"
#include "ObjC.h"
#include "Target.h"
#include "lld/Common/Args.h"
#include "lld/Common/ErrorHandler.h"

View File

@ -153,11 +153,12 @@ Optional<MemoryBufferRef> macho::readFile(StringRef path) {
InputFile::InputFile(Kind kind, const InterfaceFile &interface)
: id(idCount++), fileKind(kind), name(saver.save(interface.getPath())) {}
void ObjFile::parseSections(ArrayRef<section_64> sections) {
template <class Section>
void ObjFile::parseSections(ArrayRef<Section> sections) {
subsections.reserve(sections.size());
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
for (const section_64 &sec : sections) {
for (const Section &sec : sections) {
InputSection *isec = make<InputSection>();
isec->file = this;
isec->name =
@ -204,7 +205,8 @@ static InputSection *findContainingSubsection(SubsectionMapping &map,
return it->isec;
}
static bool validateRelocationInfo(InputFile *file, const section_64 &sec,
template <class Section>
static bool validateRelocationInfo(InputFile *file, const Section &sec,
relocation_info rel) {
const RelocAttrs &relocAttrs = target->getRelocAttrs(rel.r_type);
bool valid = true;
@ -235,7 +237,9 @@ static bool validateRelocationInfo(InputFile *file, const section_64 &sec,
return valid;
}
void ObjFile::parseRelocations(const section_64 &sec,
template <class Section>
void ObjFile::parseRelocations(ArrayRef<Section> sectionHeaders,
const Section &sec,
SubsectionMapping &subsecMap) {
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
ArrayRef<relocation_info> relInfos(
@ -279,7 +283,7 @@ void ObjFile::parseRelocations(const section_64 &sec,
if (relInfo.r_address & R_SCATTERED)
fatal("TODO: Scattered relocations not supported");
int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec, relInfo);
int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec.offset, relInfo);
assert(!(embeddedAddend && pairedAddend));
int64_t totalAddend = pairedAddend + embeddedAddend;
Reloc r;
@ -293,7 +297,7 @@ void ObjFile::parseRelocations(const section_64 &sec,
} else {
SubsectionMapping &referentSubsecMap =
subsections[relInfo.r_symbolnum - 1];
const section_64 &referentSec = sectionHeaders[relInfo.r_symbolnum - 1];
const Section &referentSec = sectionHeaders[relInfo.r_symbolnum - 1];
uint64_t referentOffset;
if (relInfo.r_pcrel) {
// The implicit addend for pcrel section relocations is the pcrel offset
@ -330,9 +334,10 @@ void ObjFile::parseRelocations(const section_64 &sec,
}
}
static macho::Symbol *createDefined(const structs::nlist_64 &sym,
StringRef name, InputSection *isec,
uint64_t value, uint64_t size) {
template <class NList>
static macho::Symbol *createDefined(const NList &sym, StringRef name,
InputSection *isec, uint64_t value,
uint64_t size) {
// Symbol scope is determined by sym.n_type & (N_EXT | N_PEXT):
// N_EXT: Global symbols
// N_EXT | N_PEXT: Linkage unit (think: dylib) scoped
@ -378,8 +383,9 @@ static bool hasCompatVersion(const InputFile *input,
// Absolute symbols are defined symbols that do not have an associated
// InputSection. They cannot be weak.
static macho::Symbol *createAbsolute(const structs::nlist_64 &sym,
InputFile *file, StringRef name) {
template <class NList>
static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
StringRef name) {
if (sym.n_type & (N_EXT | N_PEXT)) {
assert((sym.n_type & N_EXT) && "invalid input");
return symtab->addDefined(name, file, nullptr, sym.n_value, /*size=*/0,
@ -390,7 +396,8 @@ static macho::Symbol *createAbsolute(const structs::nlist_64 &sym,
/*isExternal=*/false, /*isPrivateExtern=*/false);
}
macho::Symbol *ObjFile::parseNonSectionSymbol(const structs::nlist_64 &sym,
template <class NList>
macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym,
StringRef name) {
uint8_t type = sym.n_type & N_TYPE;
switch (type) {
@ -414,14 +421,18 @@ macho::Symbol *ObjFile::parseNonSectionSymbol(const structs::nlist_64 &sym,
}
}
void ObjFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
template <class LP>
void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
ArrayRef<typename LP::nlist> nList,
const char *strtab, bool subsectionsViaSymbols) {
using Section = typename LP::section;
using NList = typename LP::nlist;
// Precompute the boundaries of symbols within a section.
// If subsectionsViaSymbols is True then the corresponding subsections will be
// created, otherwise these boundaries are used for the calculation of symbols
// sizes only.
for (const structs::nlist_64 &sym : nList) {
for (const NList &sym : nList) {
if ((sym.n_type & N_TYPE) == N_SECT && !(sym.n_desc & N_ALT_ENTRY) &&
!subsections[sym.n_sect - 1].empty()) {
SubsectionMapping &subsectionMapping = subsections[sym.n_sect - 1];
@ -462,7 +473,7 @@ void ObjFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
symbols.resize(nList.size());
for (size_t i = 0, n = nList.size(); i < n; ++i) {
const structs::nlist_64 &sym = nList[i];
const NList &sym = nList[i];
StringRef name = strtab + sym.n_strx;
if ((sym.n_type & N_TYPE) != N_SECT) {
@ -470,7 +481,7 @@ void ObjFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
continue;
}
const section_64 &sec = sectionHeaders[sym.n_sect - 1];
const Section &sec = sectionHeaders[sym.n_sect - 1];
SubsectionMapping &subsecMap = subsections[sym.n_sect - 1];
// parseSections() may have chosen not to parse this section.
@ -521,9 +532,20 @@ OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName)
: InputFile(ObjKind, mb), modTime(modTime) {
this->archiveName = std::string(archiveName);
if (target->wordSize == 8)
parse<LP64>();
else
parse<ILP32>();
}
template <class LP> void ObjFile::parse() {
using Header = typename LP::mach_header;
using SegmentCommand = typename LP::segment_command;
using Section = typename LP::section;
using NList = typename LP::nlist;
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
Architecture arch = getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype);
if (arch != config->target.Arch) {
@ -546,28 +568,29 @@ ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName)
parseLCLinkerOption(this, c->count, data);
}
if (const load_command *cmd = findCommand(hdr, LC_SEGMENT_64)) {
auto *c = reinterpret_cast<const segment_command_64 *>(cmd);
sectionHeaders = ArrayRef<section_64>{
reinterpret_cast<const section_64 *>(c + 1), c->nsects};
ArrayRef<Section> sectionHeaders;
if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) {
auto *c = reinterpret_cast<const SegmentCommand *>(cmd);
sectionHeaders =
ArrayRef<Section>{reinterpret_cast<const Section *>(c + 1), c->nsects};
parseSections(sectionHeaders);
}
// TODO: Error on missing LC_SYMTAB?
if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
auto *c = reinterpret_cast<const symtab_command *>(cmd);
ArrayRef<structs::nlist_64> nList(
reinterpret_cast<const structs::nlist_64 *>(buf + c->symoff), c->nsyms);
ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
c->nsyms);
const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
parseSymbols(nList, strtab, subsectionsViaSymbols);
parseSymbols<LP>(sectionHeaders, nList, strtab, subsectionsViaSymbols);
}
// The relocations may refer to the symbols, so we parse them after we have
// parsed all the symbols.
for (size_t i = 0, n = subsections.size(); i < n; ++i)
if (!subsections[i].empty())
parseRelocations(sectionHeaders[i], subsections[i]);
parseRelocations(sectionHeaders, sectionHeaders[i], subsections[i]);
parseDebugInfo();
}
@ -678,8 +701,16 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
if (umbrella == nullptr)
umbrella = this;
if (target->wordSize == 8)
parse<LP64>(umbrella);
else
parse<ILP32>(umbrella);
}
template <class LP> void DylibFile::parse(DylibFile *umbrella) {
using Header = typename LP::mach_header;
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
// Initialize dylibName.
if (const load_command *cmd = findCommand(hdr, LC_ID_DYLIB)) {
@ -716,8 +747,7 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
return;
}
const uint8_t *p =
reinterpret_cast<const uint8_t *>(hdr) + sizeof(mach_header_64);
const uint8_t *p = reinterpret_cast<const uint8_t *>(hdr) + sizeof(Header);
for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
auto *cmd = reinterpret_cast<const load_command *>(p);
p += cmd->cmdsize;
@ -888,3 +918,6 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mbref)
for (const lto::InputFile::Symbol &objSym : obj->symbols())
symbols.push_back(createBitcodeSymbol(objSym, *this));
}
template void ObjFile::parse<LP64>();
template void DylibFile::parse<LP64>(DylibFile *umbrella);

View File

@ -101,15 +101,20 @@ public:
llvm::DWARFUnit *compileUnit = nullptr;
const uint32_t modTime;
ArrayRef<llvm::MachO::section_64> sectionHeaders;
std::vector<InputSection *> debugSections;
private:
void parseSections(ArrayRef<llvm::MachO::section_64>);
void parseSymbols(ArrayRef<lld::structs::nlist_64> nList, const char *strtab,
template <class LP> void parse();
template <class Section> void parseSections(ArrayRef<Section>);
template <class LP>
void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
ArrayRef<typename LP::nlist> nList, const char *strtab,
bool subsectionsViaSymbols);
Symbol *parseNonSectionSymbol(const structs::nlist_64 &sym, StringRef name);
void parseRelocations(const llvm::MachO::section_64 &, SubsectionMapping &);
template <class NList>
Symbol *parseNonSectionSymbol(const NList &sym, StringRef name);
template <class Section>
void parseRelocations(ArrayRef<Section> sectionHeaders, const Section &,
SubsectionMapping &);
void parseDebugInfo();
};
@ -130,7 +135,7 @@ public:
// the root dylib to ensure symbols in the child library are correctly bound
// to the root. On the other hand, if a dylib is being directly loaded
// (through an -lfoo flag), then `umbrella` should be a nullptr.
explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella = nullptr,
explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
bool isBundleLoader = false);
explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
@ -152,6 +157,9 @@ public:
// implemented in the bundle. When used like this, it is very similar
// to a Dylib, so we re-used the same class to represent it.
bool isBundleLoader;
private:
template <class LP> void parse(DylibFile *umbrella = nullptr);
};
// .a file
@ -180,11 +188,9 @@ extern llvm::SetVector<InputFile *> inputFiles;
llvm::Optional<MemoryBufferRef> readFile(StringRef path);
template <class CommandType = llvm::MachO::load_command>
const CommandType *findCommand(const llvm::MachO::mach_header_64 *hdr,
uint32_t type) {
const uint8_t *p = reinterpret_cast<const uint8_t *>(hdr) +
sizeof(llvm::MachO::mach_header_64);
template <class CommandType = llvm::MachO::load_command, class Header>
const CommandType *findCommand(const Header *hdr, uint32_t type) {
const uint8_t *p = reinterpret_cast<const uint8_t *>(hdr) + sizeof(Header);
for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
auto *cmd = reinterpret_cast<const CommandType *>(p);

View File

@ -41,10 +41,10 @@ static uint64_t resolveSymbolVA(uint8_t *loc, const Symbol &sym, uint8_t type) {
return in.stubs->addr + sym.stubsIndex * target->stubSize;
} else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) {
if (sym.isInGot())
return in.got->addr + sym.gotIndex * WordSize;
return in.got->addr + sym.gotIndex * target->wordSize;
} else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) {
if (sym.isInGot())
return in.tlvPointers->addr + sym.gotIndex * WordSize;
return in.tlvPointers->addr + sym.gotIndex * target->wordSize;
assert(isa<Defined>(&sym));
}
return sym.getVA();

View File

@ -11,6 +11,7 @@
#include "Driver.h"
#include "InputFiles.h"
#include "Symbols.h"
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Strings.h"

View File

@ -29,6 +29,14 @@ struct nlist_64 {
llvm::support::ulittle64_t n_value;
};
struct nlist {
llvm::support::ulittle32_t n_strx;
uint8_t n_type;
uint8_t n_sect;
llvm::support::ulittle16_t n_desc;
llvm::support::ulittle32_t n_value;
};
} // namespace structs
} // namespace lld

View File

@ -9,20 +9,25 @@
#include "ObjC.h"
#include "InputFiles.h"
#include "OutputSegment.h"
#include "Target.h"
#include "llvm/BinaryFormat/MachO.h"
using namespace llvm;
using namespace llvm::MachO;
using namespace lld;
using namespace lld::macho;
bool macho::hasObjCSection(MemoryBufferRef mb) {
auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
if (const load_command *cmd = findCommand(hdr, LC_SEGMENT_64)) {
auto *c = reinterpret_cast<const segment_command_64 *>(cmd);
auto sectionHeaders = ArrayRef<section_64>{
reinterpret_cast<const section_64 *>(c + 1), c->nsects};
for (const section_64 &sec : sectionHeaders) {
template <class LP> static bool hasObjCSection(MemoryBufferRef mb) {
using Section = typename LP::section;
auto *hdr =
reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart());
if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) {
auto *c = reinterpret_cast<const typename LP::segment_command *>(cmd);
auto sectionHeaders =
ArrayRef<Section>{reinterpret_cast<const Section *>(c + 1), c->nsects};
for (const Section &sec : sectionHeaders) {
StringRef sectname(sec.sectname,
strnlen(sec.sectname, sizeof(sec.sectname)));
StringRef segname(sec.segname, strnlen(sec.segname, sizeof(sec.segname)));
@ -34,3 +39,10 @@ bool macho::hasObjCSection(MemoryBufferRef mb) {
}
return false;
}
bool macho::hasObjCSection(MemoryBufferRef mb) {
if (target->wordSize == 8)
return ::hasObjCSection<LP64>(mb);
else
return ::hasObjCSection<ILP32>(mb);
}

View File

@ -65,8 +65,21 @@ void MachHeaderSection::addLoadCommand(LoadCommand *lc) {
sizeOfCmds += lc->getSize();
}
uint64_t MachHeaderSection::getSize() const {
return sizeof(mach_header_64) + sizeOfCmds + config->headerPad;
// This serves to hide (type-erase) the template parameter from
// MachHeaderSection.
template <class LP> class MachHeaderSectionImpl : public MachHeaderSection {
public:
MachHeaderSectionImpl() = default;
uint64_t getSize() const override;
void writeTo(uint8_t *buf) const override;
};
template <class LP> MachHeaderSection *macho::makeMachHeaderSection() {
return make<MachHeaderSectionImpl<LP>>();
}
template <class LP> uint64_t MachHeaderSectionImpl<LP>::getSize() const {
return sizeof(typename LP::mach_header) + sizeOfCmds + config->headerPad;
}
static uint32_t cpuSubtype() {
@ -81,9 +94,10 @@ static uint32_t cpuSubtype() {
return subtype;
}
void MachHeaderSection::writeTo(uint8_t *buf) const {
auto *hdr = reinterpret_cast<mach_header_64 *>(buf);
hdr->magic = MH_MAGIC_64;
template <class LP>
void MachHeaderSectionImpl<LP>::writeTo(uint8_t *buf) const {
auto *hdr = reinterpret_cast<typename LP::mach_header *>(buf);
hdr->magic = LP::magic;
hdr->cputype = target->cpuType;
hdr->cpusubtype = cpuSubtype();
hdr->filetype = config->outputType;
@ -177,7 +191,7 @@ static void encodeRebase(const OutputSection *osec, uint64_t outSecOff,
}
++lastRebase.consecutiveCount;
// DO_REBASE causes dyld to both perform the binding and increment the offset
lastRebase.offset += WordSize;
lastRebase.offset += target->wordSize;
}
void RebaseSection::finalizeContents() {
@ -208,7 +222,7 @@ void RebaseSection::writeTo(uint8_t *buf) const {
NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname,
const char *name)
: SyntheticSection(segname, name) {
align = WordSize;
align = target->wordSize;
flags = S_NON_LAZY_SYMBOL_POINTERS;
}
@ -235,14 +249,14 @@ void NonLazyPointerSectionBase::addEntry(Symbol *sym) {
assert(!sym->isInGot());
sym->gotIndex = entries.size() - 1;
addNonLazyBindingEntries(sym, isec, sym->gotIndex * WordSize);
addNonLazyBindingEntries(sym, isec, sym->gotIndex * target->wordSize);
}
}
void NonLazyPointerSectionBase::writeTo(uint8_t *buf) const {
for (size_t i = 0, n = entries.size(); i < n; ++i)
if (auto *defined = dyn_cast<Defined>(entries[i]))
write64le(&buf[i * WordSize], defined->getVA());
write64le(&buf[i * target->wordSize], defined->getVA());
}
BindingSection::BindingSection()
@ -295,7 +309,7 @@ static void encodeBinding(const Symbol *sym, const OutputSection *osec,
<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
<< static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
// DO_BIND causes dyld to both perform the binding and increment the offset
lastBinding.offset += WordSize;
lastBinding.offset += target->wordSize;
}
// Non-weak bindings need to have their dylib ordinal encoded as well.
@ -463,20 +477,20 @@ void StubHelperSection::setup() {
ImageLoaderCacheSection::ImageLoaderCacheSection() {
segname = segment_names::data;
name = "__data";
uint8_t *arr = bAlloc.Allocate<uint8_t>(WordSize);
memset(arr, 0, WordSize);
data = {arr, WordSize};
align = WordSize;
uint8_t *arr = bAlloc.Allocate<uint8_t>(target->wordSize);
memset(arr, 0, target->wordSize);
data = {arr, target->wordSize};
align = target->wordSize;
}
LazyPointerSection::LazyPointerSection()
: SyntheticSection(segment_names::data, "__la_symbol_ptr") {
align = WordSize;
align = target->wordSize;
flags = S_LAZY_SYMBOL_POINTERS;
}
uint64_t LazyPointerSection::getSize() const {
return in.stubs->getEntries().size() * WordSize;
return in.stubs->getEntries().size() * target->wordSize;
}
bool LazyPointerSection::isNeeded() const {
@ -496,7 +510,7 @@ void LazyPointerSection::writeTo(uint8_t *buf) const {
} else {
write64le(buf + off, sym->getVA());
}
off += WordSize;
off += target->wordSize;
}
}
@ -517,7 +531,8 @@ void LazyBindingSection::writeTo(uint8_t *buf) const {
void LazyBindingSection::addEntry(DylibSymbol *dysym) {
if (entries.insert(dysym)) {
dysym->stubsHelperIndex = entries.size() - 1;
in.rebase->addEntry(in.lazyPointers->isec, dysym->stubsIndex * WordSize);
in.rebase->addEntry(in.lazyPointers->isec,
dysym->stubsIndex * target->wordSize);
}
}
@ -533,7 +548,7 @@ uint32_t LazyBindingSection::encode(const DylibSymbol &sym) {
os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
dataSeg->index);
uint64_t offset = in.lazyPointers->addr - dataSeg->firstSection()->addr +
sym.stubsIndex * WordSize;
sym.stubsIndex * target->wordSize;
encodeULEB128(offset, os);
encodeDylibOrdinal(ordinalForDylibSymbol(sym), os);
@ -621,10 +636,6 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: LinkEditSection(segment_names::linkEdit, section_names::symbolTable),
stringTableSection(stringTableSection) {}
uint64_t SymtabSection::getRawSize() const {
return getNumSymbols() * sizeof(structs::nlist_64);
}
void SymtabSection::emitBeginSourceStab(DWARFUnit *compileUnit) {
StabsEntry stab(N_SO);
SmallString<261> dir(compileUnit->getCompilationDir());
@ -781,8 +792,21 @@ uint32_t SymtabSection::getNumSymbols() const {
undefinedSymbols.size();
}
void SymtabSection::writeTo(uint8_t *buf) const {
auto *nList = reinterpret_cast<structs::nlist_64 *>(buf);
// This serves to hide (type-erase) the template parameter from SymtabSection.
template <class LP> class SymtabSectionImpl : public SymtabSection {
public:
SymtabSectionImpl(StringTableSection &stringTableSection)
: SymtabSection(stringTableSection) {}
uint64_t getRawSize() const override;
void writeTo(uint8_t *buf) const override;
};
template <class LP> uint64_t SymtabSectionImpl<LP>::getRawSize() const {
return getNumSymbols() * sizeof(typename LP::nlist);
}
template <class LP> void SymtabSectionImpl<LP>::writeTo(uint8_t *buf) const {
auto *nList = reinterpret_cast<typename LP::nlist *>(buf);
// Emit the stabs entries before the "real" symbols. We cannot emit them
// after as that would render Symbol::symtabIndex inaccurate.
for (const StabsEntry &entry : stabs) {
@ -845,6 +869,12 @@ void SymtabSection::writeTo(uint8_t *buf) const {
}
}
template <class LP>
SymtabSection *
macho::makeSymtabSection(StringTableSection &stringTableSection) {
return make<SymtabSectionImpl<LP>>(stringTableSection);
}
IndirectSymtabSection::IndirectSymtabSection()
: LinkEditSection(segment_names::linkEdit,
section_names::indirectSymbolTable) {}
@ -1050,3 +1080,8 @@ void macho::createSyntheticSymbols() {
// so that's what's implemented here.
addHeaderSymbol("___dso_handle");
}
template MachHeaderSection *macho::makeMachHeaderSection<LP64>();
template MachHeaderSection *macho::makeMachHeaderSection<ILP32>();
template SymtabSection *macho::makeSymtabSection<LP64>(StringTableSection &);
template SymtabSection *macho::makeSymtabSection<ILP32>(StringTableSection &);

View File

@ -53,7 +53,7 @@ class LinkEditSection : public SyntheticSection {
public:
LinkEditSection(const char *segname, const char *name)
: SyntheticSection(segname, name) {
align = WordSize; // mimic ld64
align = target->wordSize;
}
// Sections in __LINKEDIT are special: their offsets are recorded in the
@ -77,24 +77,24 @@ public:
// The header of the Mach-O file, which must have a file offset of zero.
class MachHeaderSection : public SyntheticSection {
public:
MachHeaderSection();
void addLoadCommand(LoadCommand *);
bool isHidden() const override { return true; }
uint64_t getSize() const override;
void writeTo(uint8_t *buf) const override;
private:
protected:
MachHeaderSection();
std::vector<LoadCommand *> loadCommands;
uint32_t sizeOfCmds = 0;
};
template <class LP> MachHeaderSection *makeMachHeaderSection();
// A hidden section that exists solely for the purpose of creating the
// __PAGEZERO segment, which is used to catch null pointer dereferences.
class PageZeroSection : public SyntheticSection {
public:
PageZeroSection();
bool isHidden() const override { return true; }
uint64_t getSize() const override { return PageZeroSize; }
uint64_t getSize() const override { return target->pageZeroSize; }
uint64_t getFileSize() const override { return 0; }
void writeTo(uint8_t *buf) const override {}
};
@ -111,7 +111,9 @@ public:
bool isNeeded() const override { return !entries.empty(); }
uint64_t getSize() const override { return entries.size() * WordSize; }
uint64_t getSize() const override {
return entries.size() * target->wordSize;
}
void writeTo(uint8_t *buf) const override;
@ -309,7 +311,7 @@ public:
class ImageLoaderCacheSection : public InputSection {
public:
ImageLoaderCacheSection();
uint64_t getSize() const override { return WordSize; }
uint64_t getSize() const override { return target->wordSize; }
};
// Note that this section may also be targeted by non-lazy bindings. In
@ -406,7 +408,6 @@ struct StabsEntry {
// range (start index and total number) of those symbols in the symbol table.
class SymtabSection : public LinkEditSection {
public:
SymtabSection(StringTableSection &);
void finalizeContents();
uint32_t getNumSymbols() const;
uint32_t getNumLocalSymbols() const {
@ -414,8 +415,6 @@ public:
}
uint32_t getNumExternalSymbols() const { return externalSymbols.size(); }
uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); }
uint64_t getRawSize() const override;
void writeTo(uint8_t *buf) const override;
private:
void emitBeginSourceStab(llvm::DWARFUnit *compileUnit);
@ -424,6 +423,9 @@ private:
void emitEndFunStab(Defined *);
void emitStabs();
protected:
SymtabSection(StringTableSection &);
StringTableSection &stringTableSection;
// STABS symbols are always local symbols, but we represent them with special
// entries because they may use fields like n_sect and n_desc differently.
@ -433,6 +435,8 @@ private:
std::vector<SymtabEntry> undefinedSymbols;
};
template <class LP> SymtabSection *makeSymtabSection(StringTableSection &);
// The indirect symbol table is a list of 32-bit integers that serve as indices
// into the (actual) symbol table. The indirect symbol table is a
// concatenation of several sub-arrays of indices, each sub-array belonging to

View File

@ -9,6 +9,7 @@
#ifndef LLD_MACHO_TARGET_H
#define LLD_MACHO_TARGET_H
#include "MachOStructs.h"
#include "Relocations.h"
#include "llvm/ADT/BitmaskEnum.h"
@ -26,21 +27,20 @@ class Symbol;
class DylibSymbol;
class InputSection;
enum : uint64_t {
// We are currently only supporting 64-bit targets since macOS and iOS are
// deprecating 32-bit apps.
WordSize = 8,
PageZeroSize = 1ull << 32, // XXX should be 4096 for 32-bit targets
MaxAlignmentPowerOf2 = 32,
};
class TargetInfo {
public:
template <class LP> TargetInfo(LP) {
// Having these values available in TargetInfo allows us to access them
// without having to resort to templates.
pageZeroSize = LP::pageZeroSize;
wordSize = LP::wordSize;
}
virtual ~TargetInfo() = default;
// Validate the relocation structure and get its addend.
virtual int64_t
getEmbeddedAddend(llvm::MemoryBufferRef, const llvm::MachO::section_64 &,
getEmbeddedAddend(llvm::MemoryBufferRef, uint64_t offset,
const llvm::MachO::relocation_info) const = 0;
virtual void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,
uint64_t relocVA) const = 0;
@ -70,14 +70,42 @@ public:
uint32_t cpuType;
uint32_t cpuSubtype;
size_t pageZeroSize;
size_t stubSize;
size_t stubHelperHeaderSize;
size_t stubHelperEntrySize;
size_t wordSize;
};
TargetInfo *createX86_64TargetInfo();
TargetInfo *createARM64TargetInfo();
struct LP64 {
using mach_header = llvm::MachO::mach_header_64;
using nlist = structs::nlist_64;
using segment_command = llvm::MachO::segment_command_64;
using section = llvm::MachO::section_64;
static constexpr uint32_t magic = llvm::MachO::MH_MAGIC_64;
static constexpr uint32_t segmentLCType = llvm::MachO::LC_SEGMENT_64;
static constexpr size_t pageZeroSize = 1ull << 32;
static constexpr size_t wordSize = 8;
};
struct ILP32 {
using mach_header = llvm::MachO::mach_header;
using nlist = structs::nlist;
using segment_command = llvm::MachO::segment_command;
using section = llvm::MachO::section;
static constexpr uint32_t magic = llvm::MachO::MH_MAGIC;
static constexpr uint32_t segmentLCType = llvm::MachO::LC_SEGMENT;
static constexpr size_t pageZeroSize = 1ull << 12;
static constexpr size_t wordSize = 4;
};
extern TargetInfo *target;
} // namespace macho

View File

@ -403,7 +403,7 @@ void UnwindInfoSection::writeTo(uint8_t *buf) const {
// Personalities
for (const uint32_t &personality : personalities)
*i32p++ = in.got->addr + (personality - 1) * WordSize;
*i32p++ = in.got->addr + (personality - 1) * target->wordSize;
// Level-1 index
uint32_t lsdaOffset =

View File

@ -49,8 +49,8 @@ public:
void scanRelocations();
void scanSymbols();
void createOutputSections();
void createLoadCommands();
template <class LP> void createOutputSections();
template <class LP> void createLoadCommands();
void finalizeAddressses();
void finalizeLinkEditSegment();
void assignAddresses(OutputSegment *);
@ -61,7 +61,7 @@ public:
void writeCodeSignature();
void writeOutputFile();
void run();
template <class LP> void run();
std::unique_ptr<FileOutputBuffer> &buffer;
uint64_t addr = 0;
@ -171,20 +171,23 @@ public:
IndirectSymtabSection *indirectSymtabSection;
};
class LCSegment : public LoadCommand {
template <class LP> class LCSegment : public LoadCommand {
public:
LCSegment(StringRef name, OutputSegment *seg) : name(name), seg(seg) {}
uint32_t getSize() const override {
return sizeof(segment_command_64) +
seg->numNonHiddenSections() * sizeof(section_64);
uint32_t getSize() const {
return sizeof(typename LP::segment_command) +
seg->numNonHiddenSections() * sizeof(typename LP::section);
}
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<segment_command_64 *>(buf);
buf += sizeof(segment_command_64);
void writeTo(uint8_t *buf) const {
using SegmentCommand = typename LP::segment_command;
using Section = typename LP::section;
c->cmd = LC_SEGMENT_64;
auto *c = reinterpret_cast<SegmentCommand *>(buf);
buf += sizeof(SegmentCommand);
c->cmd = LP::segmentLCType;
c->cmdsize = getSize();
memcpy(c->segname, name.data(), name.size());
c->fileoff = seg->fileOff;
@ -202,15 +205,15 @@ public:
for (const OutputSection *osec : seg->getSections()) {
if (!isZeroFill(osec->flags)) {
assert(osec->fileOff >= seg->fileOff);
c->filesize = std::max(
c->filesize = std::max<uint64_t>(
c->filesize, osec->fileOff + osec->getFileSize() - seg->fileOff);
}
if (osec->isHidden())
continue;
auto *sectHdr = reinterpret_cast<section_64 *>(buf);
buf += sizeof(section_64);
auto *sectHdr = reinterpret_cast<Section *>(buf);
buf += sizeof(Section);
memcpy(sectHdr->sectname, osec->name.data(), osec->name.size());
memcpy(sectHdr->segname, name.data(), name.size());
@ -342,7 +345,7 @@ public:
LCRPath(StringRef path) : path(path) {}
uint32_t getSize() const override {
return alignTo(sizeof(rpath_command) + path.size() + 1, WordSize);
return alignTo(sizeof(rpath_command) + path.size() + 1, target->wordSize);
}
void writeTo(uint8_t *buf) const override {
@ -459,9 +462,9 @@ static void prepareBranchTarget(Symbol *sym) {
if (in.stubs->addEntry(dysym)) {
if (sym->isWeakDef()) {
in.binding->addEntry(dysym, in.lazyPointers->isec,
sym->stubsIndex * WordSize);
sym->stubsIndex * target->wordSize);
in.weakBinding->addEntry(sym, in.lazyPointers->isec,
sym->stubsIndex * WordSize);
sym->stubsIndex * target->wordSize);
} else {
in.lazyBinding->addEntry(dysym);
}
@ -469,9 +472,10 @@ static void prepareBranchTarget(Symbol *sym) {
} else if (auto *defined = dyn_cast<Defined>(sym)) {
if (defined->isExternalWeakDef()) {
if (in.stubs->addEntry(sym)) {
in.rebase->addEntry(in.lazyPointers->isec, sym->stubsIndex * WordSize);
in.rebase->addEntry(in.lazyPointers->isec,
sym->stubsIndex * target->wordSize);
in.weakBinding->addEntry(sym, in.lazyPointers->isec,
sym->stubsIndex * WordSize);
sym->stubsIndex * target->wordSize);
}
}
}
@ -555,10 +559,10 @@ void Writer::scanSymbols() {
}
}
void Writer::createLoadCommands() {
template <class LP> void Writer::createLoadCommands() {
uint8_t segIndex = 0;
for (OutputSegment *seg : outputSegments) {
in.header->addLoadCommand(make<LCSegment>(seg->name, seg));
in.header->addLoadCommand(make<LCSegment<LP>>(seg->name, seg));
seg->index = segIndex++;
}
@ -788,12 +792,12 @@ static NamePair maybeRenameSection(NamePair key) {
return key;
}
void Writer::createOutputSections() {
template <class LP> void Writer::createOutputSections() {
TimeTraceScope timeScope("Create output sections");
// First, create hidden sections
stringTableSection = make<StringTableSection>();
unwindInfoSection = make<UnwindInfoSection>(); // TODO(gkm): only when no -r
symtabSection = make<SymtabSection>(*stringTableSection);
symtabSection = makeSymtabSection<LP>(*stringTableSection);
indirectSymtabSection = make<IndirectSymtabSection>();
if (config->adhocCodesign)
codeSignatureSection = make<CodeSignatureSection>();
@ -958,26 +962,26 @@ void Writer::writeOutputFile() {
error("failed to write to the output file: " + toString(std::move(e)));
}
void Writer::run() {
template <class LP> void Writer::run() {
prepareBranchTarget(config->entry);
scanRelocations();
if (in.stubHelper->isNeeded())
in.stubHelper->setup();
scanSymbols();
createOutputSections();
createOutputSections<LP>();
// No more sections nor segments are created beyond this point.
sortSegmentsAndSections();
createLoadCommands();
createLoadCommands<LP>();
finalizeAddressses();
finalizeLinkEditSegment();
writeMapFile();
writeOutputFile();
}
void macho::writeResult() { Writer().run(); }
template <class LP> void macho::writeResult() { Writer().run<LP>(); }
void macho::createSyntheticSections() {
in.header = make<MachHeaderSection>();
template <class LP> void macho::createSyntheticSections() {
in.header = makeMachHeaderSection<LP>();
in.rebase = make<RebaseSection>();
in.binding = make<BindingSection>();
in.weakBinding = make<WeakBindingSection>();
@ -992,3 +996,8 @@ void macho::createSyntheticSections() {
}
OutputSection *macho::firstTLVDataSection = nullptr;
template void macho::writeResult<LP64>();
template void macho::writeResult<ILP32>();
template void macho::createSyntheticSections<LP64>();
template void macho::createSyntheticSections<ILP32>();

View File

@ -25,9 +25,9 @@ public:
virtual void writeTo(uint8_t *buf) const = 0;
};
void writeResult();
template <class LP> void writeResult();
void createSyntheticSections();
template <class LP> void createSyntheticSections();
// Add bindings for symbols that need weak or non-lazy bindings.
void addNonLazyBindingEntries(const Symbol *, const InputSection *,