From 811444d7a173e696f975f8d41626f6809439f726 Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Tue, 15 Dec 2020 21:05:06 -0500 Subject: [PATCH] [lld-macho] Add support for weak references Weak references need not necessarily be satisfied at runtime (but they must still be satisfied at link time). So symbol resolution still works as per usual, but we now pass around a flag -- ultimately emitting it in the bind table -- to indicate if a given dylib symbol is a weak reference. ld64's behavior for symbols that have both weak and strong references is a bit bizarre. For non-function symbols, it will emit a weak import. For function symbols (those referenced by BRANCH relocs), it will emit a regular import. I'm not sure what value there is in that behavior, and since emulating it will make our implementation more complex, I've decided to treat regular weakrefs like function symbol ones for now. Fixes PR48511. Reviewed By: #lld-macho, thakis Differential Revision: https://reviews.llvm.org/D93369 --- lld/MachO/Driver.cpp | 5 +- lld/MachO/InputFiles.cpp | 2 +- lld/MachO/SymbolTable.cpp | 27 ++++-- lld/MachO/SymbolTable.h | 2 +- lld/MachO/Symbols.h | 33 ++++++-- lld/MachO/SyntheticSections.cpp | 35 +++++--- lld/test/MachO/symtab.s | 2 + lld/test/MachO/weak-reference.s | 144 ++++++++++++++++++++++++++++++++ 8 files changed, 222 insertions(+), 28 deletions(-) create mode 100644 lld/test/MachO/weak-reference.s diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index c522a082a306..db89dd60a20b 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -285,7 +285,7 @@ static InputFile *addFile(StringRef path, bool forceLoadArchive) { } else if (config->forceLoadObjC) { for (const object::Archive::Symbol &sym : file->symbols()) if (sym.getName().startswith(objc::klass)) - symtab->addUndefined(sym.getName()); + symtab->addUndefined(sym.getName(), /*isWeakRef=*/false); // TODO: no need to look for ObjC sections for a given archive member if // we already found that it contains an ObjC symbol. We should also @@ -723,7 +723,8 @@ bool macho::link(llvm::ArrayRef argsArr, bool canExitEarly, symtab = make(); target = createTargetInfo(args); - config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main")); + config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main"), + /*isWeakRef=*/false); config->outputFile = args.getLastArgValue(OPT_o, "a.out"); config->installName = args.getLastArgValue(OPT_install_name, config->outputFile); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 3ae3b976afa3..a32e8caf3d29 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -283,7 +283,7 @@ macho::Symbol *ObjFile::parseNonSectionSymbol(const structs::nlist_64 &sym, switch (type) { case N_UNDF: return sym.n_value == 0 - ? symtab->addUndefined(name) + ? symtab->addUndefined(name, sym.n_desc & N_WEAK_REF) : symtab->addCommon(name, this, sym.n_value, 1 << GET_COMM_ALIGN(sym.n_desc)); case N_ABS: diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp index ecf8f94239e1..93a2508951a5 100644 --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -62,15 +62,21 @@ Symbol *SymbolTable::addDefined(StringRef name, InputSection *isec, return s; } -Symbol *SymbolTable::addUndefined(StringRef name) { +Symbol *SymbolTable::addUndefined(StringRef name, bool isWeakRef) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(name); + auto refState = isWeakRef ? RefState::Weak : RefState::Strong; + if (wasInserted) - replaceSymbol(s, name); - else if (LazySymbol *lazy = dyn_cast(s)) + replaceSymbol(s, name, refState); + else if (auto *lazy = dyn_cast(s)) lazy->fetchArchiveMember(); + else if (auto *dynsym = dyn_cast(s)) + dynsym->refState = std::max(dynsym->refState, refState); + else if (auto *undefined = dyn_cast(s)) + undefined->refState = std::max(undefined->refState, refState); return s; } @@ -101,14 +107,21 @@ Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef, bool wasInserted; std::tie(s, wasInserted) = insert(name); - if (!wasInserted && isWeakDef) - if (auto *defined = dyn_cast(s)) - if (!defined->isWeakDef()) + auto refState = RefState::Unreferenced; + if (!wasInserted) { + if (auto *defined = dyn_cast(s)) { + if (isWeakDef && !defined->isWeakDef()) defined->overridesWeakDef = true; + } else if (auto *undefined = dyn_cast(s)) { + refState = undefined->refState; + } else if (auto *dysym = dyn_cast(s)) { + refState = dysym->refState; + } + } if (wasInserted || isa(s) || (isa(s) && !isWeakDef && s->isWeakDef())) - replaceSymbol(s, file, name, isWeakDef, isTlv); + replaceSymbol(s, file, name, isWeakDef, refState, isTlv); return s; } diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h index b4c7ec8604ff..fbf36619983c 100644 --- a/lld/MachO/SymbolTable.h +++ b/lld/MachO/SymbolTable.h @@ -35,7 +35,7 @@ public: Symbol *addDefined(StringRef name, InputSection *isec, uint32_t value, bool isWeakDef); - Symbol *addUndefined(StringRef name); + Symbol *addUndefined(StringRef name, bool isWeakRef); Symbol *addCommon(StringRef name, InputFile *, uint64_t size, uint32_t align); diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h index 671a1ec200fe..80898e085818 100644 --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -55,7 +55,12 @@ public: llvm_unreachable("attempt to get an offset from a non-defined symbol"); } - virtual bool isWeakDef() const { llvm_unreachable("cannot be weak"); } + virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); } + + // Only undefined or dylib symbols can be weak references. A weak reference + // need not be satisfied at runtime, e.g. due to the symbol not being + // available on a given target platform. + virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); } virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } @@ -111,11 +116,21 @@ private: const bool external : 1; }; +// Indicates whether & how a dylib symbol is referenced. +enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; + class Undefined : public Symbol { public: - Undefined(StringRefZ name) : Symbol(UndefinedKind, name) {} + Undefined(StringRefZ name, RefState refState) + : Symbol(UndefinedKind, name), refState(refState) { + assert(refState != RefState::Unreferenced); + } + + bool isWeakRef() const override { return refState == RefState::Weak; } static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } + + RefState refState : 2; }; // On Unix, it is traditionally allowed to write variable definitions without @@ -149,10 +164,14 @@ public: class DylibSymbol : public Symbol { public: - DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, bool isTlv) - : Symbol(DylibKind, name), file(file), weakDef(isWeakDef), tlv(isTlv) {} + DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, + RefState refState, bool isTlv) + : Symbol(DylibKind, name), file(file), refState(refState), + weakDef(isWeakDef), tlv(isTlv) {} bool isWeakDef() const override { return weakDef; } + bool isWeakRef() const override { return refState == RefState::Weak; } + bool isReferenced() const { return refState != RefState::Unreferenced; } bool isTlv() const override { return tlv; } bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } @@ -162,9 +181,11 @@ public: uint32_t stubsHelperIndex = UINT32_MAX; uint32_t lazyBindOffset = UINT32_MAX; + RefState refState : 2; + private: - const bool weakDef; - const bool tlv; + const bool weakDef : 1; + const bool tlv : 1; }; class LazySymbol : public Symbol { diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index c2c121005899..433e6aac0259 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -227,7 +227,8 @@ struct Binding { // lastBinding. static void encodeBinding(const Symbol *sym, const OutputSection *osec, uint64_t outSecOff, int64_t addend, - Binding &lastBinding, raw_svector_ostream &os) { + bool isWeakBinding, Binding &lastBinding, + raw_svector_ostream &os) { using namespace llvm::MachO; OutputSegment *seg = osec->parent; uint64_t offset = osec->getSegmentOffset() + outSecOff; @@ -249,8 +250,11 @@ static void encodeBinding(const Symbol *sym, const OutputSection *osec, lastBinding.addend = addend; } - os << static_cast(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) - << sym->getName() << '\0' + uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; + if (!isWeakBinding && sym->isWeakRef()) + flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT; + + os << flags << sym->getName() << '\0' << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) << static_cast(BIND_OPCODE_DO_BIND); // DO_BIND causes dyld to both perform the binding and increment the offset @@ -308,10 +312,11 @@ void BindingSection::finalizeContents() { encodeDylibOrdinal(b.dysym, lastBinding, os); if (auto *isec = b.target.section.dyn_cast()) { encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.target.offset, - b.addend, lastBinding, os); + b.addend, /*isWeakBinding=*/false, lastBinding, os); } else { auto *osec = b.target.section.get(); - encodeBinding(b.dysym, osec, b.target.offset, b.addend, lastBinding, os); + encodeBinding(b.dysym, osec, b.target.offset, b.addend, + /*isWeakBinding=*/false, lastBinding, os); } } if (!bindings.empty()) @@ -341,10 +346,11 @@ void WeakBindingSection::finalizeContents() { for (const WeakBindingEntry &b : bindings) { if (auto *isec = b.target.section.dyn_cast()) { encodeBinding(b.symbol, isec->parent, isec->outSecOff + b.target.offset, - b.addend, lastBinding, os); + b.addend, /*isWeakBinding=*/true, lastBinding, os); } else { auto *osec = b.target.section.get(); - encodeBinding(b.symbol, osec, b.target.offset, b.addend, lastBinding, os); + encodeBinding(b.symbol, osec, b.target.offset, b.addend, + /*isWeakBinding=*/true, lastBinding, os); } } if (!bindings.empty() || !definitions.empty()) @@ -436,6 +442,7 @@ void StubHelperSection::setup() { "Needed to perform lazy binding."); return; } + stubBinder->refState = RefState::Strong; in.got->addEntry(stubBinder); inputSections.push_back(in.imageLoaderCache); @@ -525,8 +532,11 @@ uint32_t LazyBindingSection::encode(const DylibSymbol &sym) { encodeULEB128(sym.file->ordinal, os); } - os << static_cast(MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) - << sym.getName() << '\0' + uint8_t flags = MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; + if (sym.isWeakRef()) + flags |= MachO::BIND_SYMBOL_FLAGS_WEAK_IMPORT; + + os << flags << sym.getName() << '\0' << static_cast(MachO::BIND_OPCODE_DO_BIND) << static_cast(MachO::BIND_OPCODE_DONE); return opstreamOffset; @@ -705,8 +715,9 @@ void SymtabSection::finalizeContents() { if (auto *defined = dyn_cast(sym)) { assert(defined->isExternal()); externalSymbols.push_back({sym, strx}); - } else if (sym->isInGot() || sym->isInStubs()) { - undefinedSymbols.push_back({sym, strx}); + } else if (auto *dysym = dyn_cast(sym)) { + if (dysym->isReferenced()) + undefinedSymbols.push_back({sym, strx}); } } @@ -756,6 +767,8 @@ void SymtabSection::writeTo(uint8_t *buf) const { } else if (auto *dysym = dyn_cast(entry.sym)) { uint16_t n_desc = nList->n_desc; MachO::SET_LIBRARY_ORDINAL(n_desc, dysym->file->ordinal); + nList->n_type = MachO::N_EXT; + n_desc |= dysym->isWeakRef() ? MachO::N_WEAK_REF : 0; nList->n_desc = n_desc; } ++nList; diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s index 2fe550a330f1..222a35798909 100644 --- a/lld/test/MachO/symtab.s +++ b/lld/test/MachO/symtab.s @@ -59,6 +59,7 @@ # CHECK-NEXT: } # CHECK-NEXT: Symbol { # CHECK-NEXT: Name: dyld_stub_binder (15) +# CHECK-NEXT: Extern # CHECK-NEXT: Type: Undef (0x0) # CHECK-NEXT: Section: (0x0) # CHECK-NEXT: RefType: UndefinedNonLazy (0x0) @@ -68,6 +69,7 @@ # CHECK-NEXT: } # CHECK-NEXT: Symbol { # CHECK-NEXT: Name: _dynamic (80) +# CHECK-NEXT: Extern # CHECK-NEXT: Type: Undef (0x0) # CHECK-NEXT: Section: (0x0) # CHECK-NEXT: RefType: UndefinedNonLazy (0x0) diff --git a/lld/test/MachO/weak-reference.s b/lld/test/MachO/weak-reference.s new file mode 100644 index 000000000000..a95002b141a0 --- /dev/null +++ b/lld/test/MachO/weak-reference.s @@ -0,0 +1,144 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libfoo.s -o %t/libfoo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/strongref.s -o %t/strongref.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/invalid.s -o %t/invalid.o +# RUN: %lld -lSystem -dylib %t/libfoo.o -o %t/libfoo.dylib + +# RUN: %lld -lSystem %t/test.o %t/libfoo.dylib -o %t/test +# RUN: llvm-objdump --macho --syms --bind %t/test | FileCheck %s --check-prefixes=SYMS,BIND +## llvm-objdump doesn't print out all the flags info for lazy & weak bindings, +## so we use obj2yaml instead to test them. +# RUN: obj2yaml %t/test | FileCheck %s --check-prefix=YAML + +# RUN: %lld -lSystem %t/libfoo.dylib %t/test.o -o %t/test +# RUN: llvm-objdump --macho --syms --bind %t/test | FileCheck %s --check-prefixes=SYMS,BIND +# RUN: obj2yaml %t/test | FileCheck %s --check-prefix=YAML + +# SYMS: SYMBOL TABLE: +# SYMS-DAG: 0000000000000000 w *UND* _foo +# SYMS-DAG: 0000000000000000 w *UND* _foo_fn +# SYMS-DAG: 0000000000000000 w *UND* _foo_tlv +# SYMS-DAG: 0000000000000000 w *UND* _weak_foo +# SYMS-DAG: 0000000000000000 w *UND* _weak_foo_fn + +# BIND: Bind table: +# BIND-NEXT: segment section address type addend dylib symbol +# BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _foo (weak_import) +# BIND-DAG: __DATA_CONST __got 0x{{[0-9a-f]+}} pointer 0 libfoo _foo (weak_import) +# BIND-DAG: __DATA __thread_ptrs 0x{{[0-9a-f]+}} pointer 0 libfoo _foo_tlv (weak_import) +# BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _weak_foo (weak_import) +# BIND-DAG: __DATA __la_symbol_ptr 0x{{[0-9a-f]+}} pointer 0 libfoo _weak_foo_fn (weak_import) + +# YAML-LABEL: WeakBindOpcodes: +# YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# YAML-NEXT: Imm: 0 +# YAML-NEXT: Symbol: _weak_foo +# YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# YAML-NEXT: Imm: 0 +# YAML-NEXT: Symbol: _weak_foo_fn +# YAML-LABEL: LazyBindOpcodes: +# YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# YAML-NEXT: Imm: 1 +# YAML-NEXT: Symbol: _foo_fn + +## Check that if both strong & weak references are present in inputs, the weak +## reference takes priority. NOTE: ld64 actually emits a strong reference if +## the reference is to a function symbol or a TLV. I'm not sure if there's a +## good reason for that, so I'm deviating here for a simpler implementation. +# RUN: %lld -lSystem %t/test.o %t/strongref.o %t/libfoo.dylib -o %t/with-strong +# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND +# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML +# RUN: %lld -lSystem %t/strongref.o %t/test.o %t/libfoo.dylib -o %t/with-strong +# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND +# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML +# RUN: %lld -lSystem %t/libfoo.dylib %t/strongref.o %t/test.o -o %t/with-strong +# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND +# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML +# RUN: %lld -lSystem %t/libfoo.dylib %t/test.o %t/strongref.o -o %t/with-strong +# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND +# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML +# RUN: %lld -lSystem %t/test.o %t/libfoo.dylib %t/strongref.o -o %t/with-strong +# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND +# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML +# RUN: %lld -lSystem %t/strongref.o %t/libfoo.dylib %t/test.o -o %t/with-strong +# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND +# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML + +# STRONG-BIND: Bind table: +# STRONG-BIND-NEXT: segment section address type addend dylib symbol +# STRONG-BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _foo{{$}} +# STRONG-BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _foo{{$}} +# STRONG-BIND-DAG: __DATA_CONST __got 0x{{[0-9a-f]+}} pointer 0 libfoo _foo{{$}} +# STRONG-BIND-DAG: __DATA __thread_ptrs 0x{{[0-9a-f]+}} pointer 0 libfoo _foo_tlv{{$}} +# STRONG-BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _weak_foo{{$}} +# STRONG-BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _weak_foo{{$}} +# STRONG-BIND-DAG: __DATA __la_symbol_ptr 0x{{[0-9a-f]+}} pointer 0 libfoo _weak_foo_fn{{$}} + +# STRONG-YAML-LABEL: WeakBindOpcodes: +# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# STRONG-YAML-NEXT: Imm: 0 +# STRONG-YAML-NEXT: Symbol: _weak_foo +# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# STRONG-YAML-NEXT: Imm: 0 +# STRONG-YAML-NEXT: Symbol: _weak_foo +# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# STRONG-YAML-NEXT: Imm: 0 +# STRONG-YAML-NEXT: Symbol: _weak_foo_fn +# STRONG-YAML-LABEL: LazyBindOpcodes: +# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# STRONG-YAML-NEXT: Imm: 0 +# STRONG-YAML-NEXT: Symbol: _foo_fn + +## Weak references must still be satisfied at link time. +# RUN: not %lld -lSystem %t/invalid.o -o /dev/null 2>&1 | FileCheck %s \ +# RUN: --check-prefix=INVALID -DDIR=%t +# INVALID: error: undefined symbol _missing, referenced from [[DIR]]/invalid.o + +#--- libfoo.s +.globl _foo, _foo_fn, _weak_foo, _weak_foo_fn +.weak_definition _weak_foo, _weak_foo_fn +_foo: +_foo_fn: +_weak_foo: +_weak_foo_fn: + +.section __DATA,__thread_vars,thread_local_variables +.globl _foo_tlv +_foo_tlv: + +#--- test.s +.globl _main +.weak_reference _foo_fn, _foo, _weak_foo, _weak_foo_fn, _foo_tlv + +_main: + mov _foo@GOTPCREL(%rip), %rax + mov _foo_tlv@TLVP(%rip), %rax + callq _foo_fn + callq _weak_foo_fn + ret + +.data + .quad _foo + .quad _weak_foo + +#--- strongref.s +.globl _strongref +_strongref: + mov _foo@GOTPCREL(%rip), %rax + mov _foo_tlv@TLVP(%rip), %rax + callq _foo_fn + callq _weak_foo_fn + ret + +.data + .quad _foo + .quad _weak_foo + +#--- invalid.s +.globl _main +.weak_reference _missing +_main: + callq _missing + ret