[lld-macho] Add support for weak references

Weak references need not necessarily be satisfied at runtime (but they must
still be satisfied at link time). So symbol resolution still works as per usual,
but we now pass around a flag -- ultimately emitting it in the bind table -- to
indicate if a given dylib symbol is a weak reference.

ld64's behavior for symbols that have both weak and strong references is
a bit bizarre. For non-function symbols, it will emit a weak import. For
function symbols (those referenced by BRANCH relocs), it will emit a
regular import. I'm not sure what value there is in that behavior, and
since emulating it will make our implementation more complex, I've
decided to treat regular weakrefs like function symbol ones for now.

Fixes PR48511.

Reviewed By: #lld-macho, thakis

Differential Revision: https://reviews.llvm.org/D93369
This commit is contained in:
Jez Ng 2020-12-15 21:05:06 -05:00
parent c5046ebdf6
commit 811444d7a1
8 changed files with 222 additions and 28 deletions

View File

@ -285,7 +285,7 @@ static InputFile *addFile(StringRef path, bool forceLoadArchive) {
} else if (config->forceLoadObjC) {
for (const object::Archive::Symbol &sym : file->symbols())
if (sym.getName().startswith(objc::klass))
symtab->addUndefined(sym.getName());
symtab->addUndefined(sym.getName(), /*isWeakRef=*/false);
// TODO: no need to look for ObjC sections for a given archive member if
// we already found that it contains an ObjC symbol. We should also
@ -723,7 +723,8 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
symtab = make<SymbolTable>();
target = createTargetInfo(args);
config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main"));
config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main"),
/*isWeakRef=*/false);
config->outputFile = args.getLastArgValue(OPT_o, "a.out");
config->installName =
args.getLastArgValue(OPT_install_name, config->outputFile);

View File

@ -283,7 +283,7 @@ macho::Symbol *ObjFile::parseNonSectionSymbol(const structs::nlist_64 &sym,
switch (type) {
case N_UNDF:
return sym.n_value == 0
? symtab->addUndefined(name)
? symtab->addUndefined(name, sym.n_desc & N_WEAK_REF)
: symtab->addCommon(name, this, sym.n_value,
1 << GET_COMM_ALIGN(sym.n_desc));
case N_ABS:

View File

@ -62,15 +62,21 @@ Symbol *SymbolTable::addDefined(StringRef name, InputSection *isec,
return s;
}
Symbol *SymbolTable::addUndefined(StringRef name) {
Symbol *SymbolTable::addUndefined(StringRef name, bool isWeakRef) {
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insert(name);
auto refState = isWeakRef ? RefState::Weak : RefState::Strong;
if (wasInserted)
replaceSymbol<Undefined>(s, name);
else if (LazySymbol *lazy = dyn_cast<LazySymbol>(s))
replaceSymbol<Undefined>(s, name, refState);
else if (auto *lazy = dyn_cast<LazySymbol>(s))
lazy->fetchArchiveMember();
else if (auto *dynsym = dyn_cast<DylibSymbol>(s))
dynsym->refState = std::max(dynsym->refState, refState);
else if (auto *undefined = dyn_cast<Undefined>(s))
undefined->refState = std::max(undefined->refState, refState);
return s;
}
@ -101,14 +107,21 @@ Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef,
bool wasInserted;
std::tie(s, wasInserted) = insert(name);
if (!wasInserted && isWeakDef)
if (auto *defined = dyn_cast<Defined>(s))
if (!defined->isWeakDef())
auto refState = RefState::Unreferenced;
if (!wasInserted) {
if (auto *defined = dyn_cast<Defined>(s)) {
if (isWeakDef && !defined->isWeakDef())
defined->overridesWeakDef = true;
} else if (auto *undefined = dyn_cast<Undefined>(s)) {
refState = undefined->refState;
} else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
refState = dysym->refState;
}
}
if (wasInserted || isa<Undefined>(s) ||
(isa<DylibSymbol>(s) && !isWeakDef && s->isWeakDef()))
replaceSymbol<DylibSymbol>(s, file, name, isWeakDef, isTlv);
replaceSymbol<DylibSymbol>(s, file, name, isWeakDef, refState, isTlv);
return s;
}

View File

@ -35,7 +35,7 @@ public:
Symbol *addDefined(StringRef name, InputSection *isec, uint32_t value,
bool isWeakDef);
Symbol *addUndefined(StringRef name);
Symbol *addUndefined(StringRef name, bool isWeakRef);
Symbol *addCommon(StringRef name, InputFile *, uint64_t size, uint32_t align);

View File

@ -55,7 +55,12 @@ public:
llvm_unreachable("attempt to get an offset from a non-defined symbol");
}
virtual bool isWeakDef() const { llvm_unreachable("cannot be weak"); }
virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); }
// Only undefined or dylib symbols can be weak references. A weak reference
// need not be satisfied at runtime, e.g. due to the symbol not being
// available on a given target platform.
virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); }
virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
@ -111,11 +116,21 @@ private:
const bool external : 1;
};
// Indicates whether & how a dylib symbol is referenced.
enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
class Undefined : public Symbol {
public:
Undefined(StringRefZ name) : Symbol(UndefinedKind, name) {}
Undefined(StringRefZ name, RefState refState)
: Symbol(UndefinedKind, name), refState(refState) {
assert(refState != RefState::Unreferenced);
}
bool isWeakRef() const override { return refState == RefState::Weak; }
static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
RefState refState : 2;
};
// On Unix, it is traditionally allowed to write variable definitions without
@ -149,10 +164,14 @@ public:
class DylibSymbol : public Symbol {
public:
DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, bool isTlv)
: Symbol(DylibKind, name), file(file), weakDef(isWeakDef), tlv(isTlv) {}
DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
RefState refState, bool isTlv)
: Symbol(DylibKind, name), file(file), refState(refState),
weakDef(isWeakDef), tlv(isTlv) {}
bool isWeakDef() const override { return weakDef; }
bool isWeakRef() const override { return refState == RefState::Weak; }
bool isReferenced() const { return refState != RefState::Unreferenced; }
bool isTlv() const override { return tlv; }
bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
@ -162,9 +181,11 @@ public:
uint32_t stubsHelperIndex = UINT32_MAX;
uint32_t lazyBindOffset = UINT32_MAX;
RefState refState : 2;
private:
const bool weakDef;
const bool tlv;
const bool weakDef : 1;
const bool tlv : 1;
};
class LazySymbol : public Symbol {

View File

@ -227,7 +227,8 @@ struct Binding {
// lastBinding.
static void encodeBinding(const Symbol *sym, const OutputSection *osec,
uint64_t outSecOff, int64_t addend,
Binding &lastBinding, raw_svector_ostream &os) {
bool isWeakBinding, Binding &lastBinding,
raw_svector_ostream &os) {
using namespace llvm::MachO;
OutputSegment *seg = osec->parent;
uint64_t offset = osec->getSegmentOffset() + outSecOff;
@ -249,8 +250,11 @@ static void encodeBinding(const Symbol *sym, const OutputSection *osec,
lastBinding.addend = addend;
}
os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
<< sym->getName() << '\0'
uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
if (!isWeakBinding && sym->isWeakRef())
flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT;
os << flags << sym->getName() << '\0'
<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
<< static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
// DO_BIND causes dyld to both perform the binding and increment the offset
@ -308,10 +312,11 @@ void BindingSection::finalizeContents() {
encodeDylibOrdinal(b.dysym, lastBinding, os);
if (auto *isec = b.target.section.dyn_cast<const InputSection *>()) {
encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.target.offset,
b.addend, lastBinding, os);
b.addend, /*isWeakBinding=*/false, lastBinding, os);
} else {
auto *osec = b.target.section.get<const OutputSection *>();
encodeBinding(b.dysym, osec, b.target.offset, b.addend, lastBinding, os);
encodeBinding(b.dysym, osec, b.target.offset, b.addend,
/*isWeakBinding=*/false, lastBinding, os);
}
}
if (!bindings.empty())
@ -341,10 +346,11 @@ void WeakBindingSection::finalizeContents() {
for (const WeakBindingEntry &b : bindings) {
if (auto *isec = b.target.section.dyn_cast<const InputSection *>()) {
encodeBinding(b.symbol, isec->parent, isec->outSecOff + b.target.offset,
b.addend, lastBinding, os);
b.addend, /*isWeakBinding=*/true, lastBinding, os);
} else {
auto *osec = b.target.section.get<const OutputSection *>();
encodeBinding(b.symbol, osec, b.target.offset, b.addend, lastBinding, os);
encodeBinding(b.symbol, osec, b.target.offset, b.addend,
/*isWeakBinding=*/true, lastBinding, os);
}
}
if (!bindings.empty() || !definitions.empty())
@ -436,6 +442,7 @@ void StubHelperSection::setup() {
"Needed to perform lazy binding.");
return;
}
stubBinder->refState = RefState::Strong;
in.got->addEntry(stubBinder);
inputSections.push_back(in.imageLoaderCache);
@ -525,8 +532,11 @@ uint32_t LazyBindingSection::encode(const DylibSymbol &sym) {
encodeULEB128(sym.file->ordinal, os);
}
os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
<< sym.getName() << '\0'
uint8_t flags = MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
if (sym.isWeakRef())
flags |= MachO::BIND_SYMBOL_FLAGS_WEAK_IMPORT;
os << flags << sym.getName() << '\0'
<< static_cast<uint8_t>(MachO::BIND_OPCODE_DO_BIND)
<< static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
return opstreamOffset;
@ -705,8 +715,9 @@ void SymtabSection::finalizeContents() {
if (auto *defined = dyn_cast<Defined>(sym)) {
assert(defined->isExternal());
externalSymbols.push_back({sym, strx});
} else if (sym->isInGot() || sym->isInStubs()) {
undefinedSymbols.push_back({sym, strx});
} else if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
if (dysym->isReferenced())
undefinedSymbols.push_back({sym, strx});
}
}
@ -756,6 +767,8 @@ void SymtabSection::writeTo(uint8_t *buf) const {
} else if (auto *dysym = dyn_cast<DylibSymbol>(entry.sym)) {
uint16_t n_desc = nList->n_desc;
MachO::SET_LIBRARY_ORDINAL(n_desc, dysym->file->ordinal);
nList->n_type = MachO::N_EXT;
n_desc |= dysym->isWeakRef() ? MachO::N_WEAK_REF : 0;
nList->n_desc = n_desc;
}
++nList;

View File

@ -59,6 +59,7 @@
# CHECK-NEXT: }
# CHECK-NEXT: Symbol {
# CHECK-NEXT: Name: dyld_stub_binder (15)
# CHECK-NEXT: Extern
# CHECK-NEXT: Type: Undef (0x0)
# CHECK-NEXT: Section: (0x0)
# CHECK-NEXT: RefType: UndefinedNonLazy (0x0)
@ -68,6 +69,7 @@
# CHECK-NEXT: }
# CHECK-NEXT: Symbol {
# CHECK-NEXT: Name: _dynamic (80)
# CHECK-NEXT: Extern
# CHECK-NEXT: Type: Undef (0x0)
# CHECK-NEXT: Section: (0x0)
# CHECK-NEXT: RefType: UndefinedNonLazy (0x0)

View File

@ -0,0 +1,144 @@
# REQUIRES: x86
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libfoo.s -o %t/libfoo.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/strongref.s -o %t/strongref.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/invalid.s -o %t/invalid.o
# RUN: %lld -lSystem -dylib %t/libfoo.o -o %t/libfoo.dylib
# RUN: %lld -lSystem %t/test.o %t/libfoo.dylib -o %t/test
# RUN: llvm-objdump --macho --syms --bind %t/test | FileCheck %s --check-prefixes=SYMS,BIND
## llvm-objdump doesn't print out all the flags info for lazy & weak bindings,
## so we use obj2yaml instead to test them.
# RUN: obj2yaml %t/test | FileCheck %s --check-prefix=YAML
# RUN: %lld -lSystem %t/libfoo.dylib %t/test.o -o %t/test
# RUN: llvm-objdump --macho --syms --bind %t/test | FileCheck %s --check-prefixes=SYMS,BIND
# RUN: obj2yaml %t/test | FileCheck %s --check-prefix=YAML
# SYMS: SYMBOL TABLE:
# SYMS-DAG: 0000000000000000 w *UND* _foo
# SYMS-DAG: 0000000000000000 w *UND* _foo_fn
# SYMS-DAG: 0000000000000000 w *UND* _foo_tlv
# SYMS-DAG: 0000000000000000 w *UND* _weak_foo
# SYMS-DAG: 0000000000000000 w *UND* _weak_foo_fn
# BIND: Bind table:
# BIND-NEXT: segment section address type addend dylib symbol
# BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _foo (weak_import)
# BIND-DAG: __DATA_CONST __got 0x{{[0-9a-f]+}} pointer 0 libfoo _foo (weak_import)
# BIND-DAG: __DATA __thread_ptrs 0x{{[0-9a-f]+}} pointer 0 libfoo _foo_tlv (weak_import)
# BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _weak_foo (weak_import)
# BIND-DAG: __DATA __la_symbol_ptr 0x{{[0-9a-f]+}} pointer 0 libfoo _weak_foo_fn (weak_import)
# YAML-LABEL: WeakBindOpcodes:
# YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
# YAML-NEXT: Imm: 0
# YAML-NEXT: Symbol: _weak_foo
# YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
# YAML-NEXT: Imm: 0
# YAML-NEXT: Symbol: _weak_foo_fn
# YAML-LABEL: LazyBindOpcodes:
# YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
# YAML-NEXT: Imm: 1
# YAML-NEXT: Symbol: _foo_fn
## Check that if both strong & weak references are present in inputs, the weak
## reference takes priority. NOTE: ld64 actually emits a strong reference if
## the reference is to a function symbol or a TLV. I'm not sure if there's a
## good reason for that, so I'm deviating here for a simpler implementation.
# RUN: %lld -lSystem %t/test.o %t/strongref.o %t/libfoo.dylib -o %t/with-strong
# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND
# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML
# RUN: %lld -lSystem %t/strongref.o %t/test.o %t/libfoo.dylib -o %t/with-strong
# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND
# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML
# RUN: %lld -lSystem %t/libfoo.dylib %t/strongref.o %t/test.o -o %t/with-strong
# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND
# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML
# RUN: %lld -lSystem %t/libfoo.dylib %t/test.o %t/strongref.o -o %t/with-strong
# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND
# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML
# RUN: %lld -lSystem %t/test.o %t/libfoo.dylib %t/strongref.o -o %t/with-strong
# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND
# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML
# RUN: %lld -lSystem %t/strongref.o %t/libfoo.dylib %t/test.o -o %t/with-strong
# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND
# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML
# STRONG-BIND: Bind table:
# STRONG-BIND-NEXT: segment section address type addend dylib symbol
# STRONG-BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _foo{{$}}
# STRONG-BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _foo{{$}}
# STRONG-BIND-DAG: __DATA_CONST __got 0x{{[0-9a-f]+}} pointer 0 libfoo _foo{{$}}
# STRONG-BIND-DAG: __DATA __thread_ptrs 0x{{[0-9a-f]+}} pointer 0 libfoo _foo_tlv{{$}}
# STRONG-BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _weak_foo{{$}}
# STRONG-BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _weak_foo{{$}}
# STRONG-BIND-DAG: __DATA __la_symbol_ptr 0x{{[0-9a-f]+}} pointer 0 libfoo _weak_foo_fn{{$}}
# STRONG-YAML-LABEL: WeakBindOpcodes:
# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
# STRONG-YAML-NEXT: Imm: 0
# STRONG-YAML-NEXT: Symbol: _weak_foo
# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
# STRONG-YAML-NEXT: Imm: 0
# STRONG-YAML-NEXT: Symbol: _weak_foo
# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
# STRONG-YAML-NEXT: Imm: 0
# STRONG-YAML-NEXT: Symbol: _weak_foo_fn
# STRONG-YAML-LABEL: LazyBindOpcodes:
# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
# STRONG-YAML-NEXT: Imm: 0
# STRONG-YAML-NEXT: Symbol: _foo_fn
## Weak references must still be satisfied at link time.
# RUN: not %lld -lSystem %t/invalid.o -o /dev/null 2>&1 | FileCheck %s \
# RUN: --check-prefix=INVALID -DDIR=%t
# INVALID: error: undefined symbol _missing, referenced from [[DIR]]/invalid.o
#--- libfoo.s
.globl _foo, _foo_fn, _weak_foo, _weak_foo_fn
.weak_definition _weak_foo, _weak_foo_fn
_foo:
_foo_fn:
_weak_foo:
_weak_foo_fn:
.section __DATA,__thread_vars,thread_local_variables
.globl _foo_tlv
_foo_tlv:
#--- test.s
.globl _main
.weak_reference _foo_fn, _foo, _weak_foo, _weak_foo_fn, _foo_tlv
_main:
mov _foo@GOTPCREL(%rip), %rax
mov _foo_tlv@TLVP(%rip), %rax
callq _foo_fn
callq _weak_foo_fn
ret
.data
.quad _foo
.quad _weak_foo
#--- strongref.s
.globl _strongref
_strongref:
mov _foo@GOTPCREL(%rip), %rax
mov _foo_tlv@TLVP(%rip), %rax
callq _foo_fn
callq _weak_foo_fn
ret
.data
.quad _foo
.quad _weak_foo
#--- invalid.s
.globl _main
.weak_reference _missing
_main:
callq _missing
ret