[lld][WebAssembly] Match the ELF linker in transitioning away from archive indexes. (#78658)

The ELF linker transitioned away from archive indexes in
https://reviews.llvm.org/D117284.

This paves the way for supporting `--start-lib`/`--end-lib` (See #77960)

The ELF linker unified library handling with `--start-lib`/`--end-lib` and removed
the ArchiveFile class in https://reviews.llvm.org/D119074.
This commit is contained in:
Sam Clegg 2024-01-19 16:20:29 -08:00 committed by GitHub
parent c71a5bf940
commit bcc9b9d80c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 99 additions and 142 deletions

View File

@ -50,5 +50,9 @@ MachO Improvements
WebAssembly Improvements
------------------------
* Indexes are no longer required on archive files. Instead symbol information
is read from object files within the archive. This matches the behaviour of
the ELF linker.
Fixes
#####

View File

@ -1,14 +0,0 @@
# Tests error on archive file without a symbol table
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
# RUN: llvm-as -o %t.archive.o %S/Inputs/archive1.ll
# RUN: rm -f %t.a
# RUN: llvm-ar crS %t.a %t.archive.o
# RUN: not wasm-ld -o out.wasm %t.o %t.a 2>&1 | FileCheck %s
.globl _start
_start:
.functype _start () -> ()
end_function
# CHECK: archive has no index; run ranlib to add one

View File

@ -5,7 +5,7 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux -o %t.dir/elf.o %s
# RUN: llvm-ar rcs %t.dir/libfoo.a %t.dir/elf.o
# RUN: not wasm-ld %t.dir/libfoo.a -o /dev/null 2>&1 | FileCheck %s
# CHECK: error: unknown file type: {{.*}}libfoo.a(elf.o)
# CHECK: warning: {{.*}}libfoo.a: archive member 'elf.o' is neither Wasm object file nor LLVM bitcode
.globl _start
_start:

View File

@ -272,9 +272,11 @@ void LinkerDriver::addFile(StringRef path) {
if (fs::exists(importFile))
readImportFile(importFile.str());
auto members = getArchiveMembers(mbref);
// Handle -whole-archive.
if (inWholeArchive) {
for (const auto &[m, offset] : getArchiveMembers(mbref)) {
for (const auto &[m, offset] : members) {
auto *object = createObjectFile(m, path, offset);
// Mark object as live; object members are normally not
// live by default but -whole-archive is designed to treat
@ -289,12 +291,15 @@ void LinkerDriver::addFile(StringRef path) {
std::unique_ptr<Archive> file =
CHECK(Archive::create(mbref), path + ": failed to parse archive");
if (!file->isEmpty() && !file->hasSymbolTable()) {
error(mbref.getBufferIdentifier() +
": archive has no index; run ranlib to add one");
for (const auto &[m, offset] : members) {
auto magic = identify_magic(m.getBuffer());
if (magic == file_magic::wasm_object || magic == file_magic::bitcode)
files.push_back(createObjectFile(m, path, offset, true));
else
warn(path + ": archive member '" + m.getBufferIdentifier() +
"' is neither Wasm object file nor LLVM bitcode");
}
files.push_back(make<ArchiveFile>(mbref));
return;
}
case file_magic::bitcode:
@ -732,16 +737,10 @@ static Symbol *handleUndefined(StringRef name, const char *option) {
static void handleLibcall(StringRef name) {
Symbol *sym = symtab->find(name);
if (!sym)
return;
if (auto *lazySym = dyn_cast<LazySymbol>(sym)) {
MemoryBufferRef mb = lazySym->getMemberBuffer();
if (isBitcode(mb)) {
if (!config->whyExtract.empty())
ctx.whyExtractRecords.emplace_back("<libcall>", sym->getFile(), *sym);
lazySym->extract();
}
if (sym && sym->isLazy() && isa<BitcodeFile>(sym->getFile())) {
if (!config->whyExtract.empty())
ctx.whyExtractRecords.emplace_back("<libcall>", sym->getFile(), *sym);
cast<LazySymbol>(sym)->extract();
}
}

View File

@ -75,7 +75,7 @@ std::optional<MemoryBufferRef> readFile(StringRef path) {
}
InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName,
uint64_t offsetInArchive) {
uint64_t offsetInArchive, bool lazy) {
file_magic magic = identify_magic(mb.getBuffer());
if (magic == file_magic::wasm_object) {
std::unique_ptr<Binary> bin =
@ -83,18 +83,11 @@ InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName,
auto *obj = cast<WasmObjectFile>(bin.get());
if (obj->isSharedObject())
return make<SharedFile>(mb);
return make<ObjFile>(mb, archiveName);
return make<ObjFile>(mb, archiveName, lazy);
}
if (magic == file_magic::bitcode)
return make<BitcodeFile>(mb, archiveName, offsetInArchive);
std::string name = mb.getBufferIdentifier().str();
if (!archiveName.empty()) {
name = archiveName.str() + "(" + name + ")";
}
fatal("unknown file type: " + name);
assert(magic == file_magic::bitcode);
return make<BitcodeFile>(mb, archiveName, offsetInArchive, lazy);
}
// Relocations contain either symbol or type indices. This function takes a
@ -391,9 +384,30 @@ static bool shouldMerge(const WasmSegment &seg) {
return true;
}
void ObjFile::parse(bool ignoreComdats) {
// Parse a memory buffer as a wasm file.
LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
void ObjFile::parseLazy() {
LLVM_DEBUG(dbgs() << "ObjFile::parseLazy: " << toString(this) << "\n");
for (const SymbolRef &sym : wasmObj->symbols()) {
const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl());
if (!wasmSym.isDefined())
continue;
symtab->addLazy(wasmSym.Info.Name, this);
// addLazy() may trigger this->extract() if an existing symbol is an
// undefined symbol. If that happens, this function has served its purpose,
// and we can exit from the loop early.
if (!lazy)
break;
}
}
ObjFile::ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy)
: InputFile(ObjectKind, m) {
this->lazy = lazy;
this->archiveName = std::string(archiveName);
// If this isn't part of an archive, it's eagerly linked, so mark it live.
if (archiveName.empty())
markLive();
std::unique_ptr<Binary> bin = CHECK(createBinary(mb), toString(this));
auto *obj = dyn_cast<WasmObjectFile>(bin.get());
@ -406,6 +420,11 @@ void ObjFile::parse(bool ignoreComdats) {
wasmObj.reset(obj);
checkArch(obj->getArch());
}
void ObjFile::parse(bool ignoreComdats) {
// Parse a memory buffer as a wasm file.
LLVM_DEBUG(dbgs() << "ObjFile::parse: " << toString(this) << "\n");
// Build up a map of function indices to table indices for use when
// verifying the existing table index relocations
@ -717,43 +736,6 @@ void StubFile::parse() {
}
}
void ArchiveFile::parse() {
// Parse a MemoryBufferRef as an archive file.
LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
file = CHECK(Archive::create(mb), toString(this));
// Read the symbol table to construct Lazy symbols.
int count = 0;
for (const Archive::Symbol &sym : file->symbols()) {
symtab->addLazy(this, &sym);
++count;
}
LLVM_DEBUG(dbgs() << "Read " << count << " symbols\n");
(void) count;
}
void ArchiveFile::addMember(const Archive::Symbol *sym) {
const Archive::Child &c =
CHECK(sym->getMember(),
"could not get the member for symbol " + sym->getName());
// Don't try to load the same member twice (this can happen when members
// mutually reference each other).
if (!seen.insert(c.getChildOffset()).second)
return;
LLVM_DEBUG(dbgs() << "loading lazy: " << sym->getName() << "\n");
LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
MemoryBufferRef mb =
CHECK(c.getMemoryBufferRef(),
"could not get the buffer for the member defining symbol " +
sym->getName());
InputFile *obj = createObjectFile(mb, getName(), c.getChildOffset());
symtab->addFile(obj, sym->getName());
}
static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
switch (gvVisibility) {
case GlobalValue::DefaultVisibility:
@ -790,8 +772,9 @@ static Symbol *createBitcodeSymbol(const std::vector<bool> &keptComdats,
}
BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName,
uint64_t offsetInArchive)
uint64_t offsetInArchive, bool lazy)
: InputFile(BitcodeKind, m) {
this->lazy = lazy;
this->archiveName = std::string(archiveName);
std::string path = mb.getBufferIdentifier().str();
@ -817,6 +800,20 @@ BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName,
bool BitcodeFile::doneLTO = false;
void BitcodeFile::parseLazy() {
for (auto [i, irSym] : llvm::enumerate(obj->symbols())) {
if (irSym.isUndefined())
continue;
StringRef name = saver().save(irSym.getName());
symtab->addLazy(name, this);
// addLazy() may trigger this->extract() if an existing symbol is an
// undefined symbol. If that happens, this function has served its purpose,
// and we can exit from the loop early.
if (!lazy)
break;
}
}
void BitcodeFile::parse(StringRef symName) {
if (doneLTO) {
error(toString(this) + ": attempt to add bitcode file after LTO (" + symName + ")");

View File

@ -14,7 +14,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/Wasm.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/TargetParser/Triple.h"
@ -45,7 +44,6 @@ public:
enum Kind {
ObjectKind,
SharedKind,
ArchiveKind,
BitcodeKind,
StubKind,
};
@ -69,6 +67,11 @@ public:
void markLive() { live = true; }
bool isLive() const { return live; }
// True if this file is exists as in an archive file and has not yet been
// extracted.
// TODO(sbc): Use this to implement --start-lib/--end-lib.
bool lazy = false;
protected:
InputFile(Kind k, MemoryBufferRef m)
: mb(m), fileKind(k), live(!config->gcSections) {}
@ -85,35 +88,14 @@ private:
bool live;
};
// .a file (ar archive)
class ArchiveFile : public InputFile {
public:
explicit ArchiveFile(MemoryBufferRef m) : InputFile(ArchiveKind, m) {}
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
void addMember(const llvm::object::Archive::Symbol *sym);
void parse();
private:
std::unique_ptr<llvm::object::Archive> file;
llvm::DenseSet<uint64_t> seen;
};
// .o file (wasm object file)
class ObjFile : public InputFile {
public:
explicit ObjFile(MemoryBufferRef m, StringRef archiveName)
: InputFile(ObjectKind, m) {
this->archiveName = std::string(archiveName);
// If this isn't part of an archive, it's eagerly linked, so mark it live.
if (archiveName.empty())
markLive();
}
ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy = false);
static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
void parse(bool ignoreComdats = false);
void parseLazy();
// Returns the underlying wasm file.
const WasmObjectFile *getWasmObj() const { return wasmObj.get(); }
@ -173,10 +155,11 @@ public:
class BitcodeFile : public InputFile {
public:
BitcodeFile(MemoryBufferRef m, StringRef archiveName,
uint64_t offsetInArchive);
uint64_t offsetInArchive, bool lazy);
static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
void parse(StringRef symName);
void parseLazy();
std::unique_ptr<llvm::lto::InputFile> obj;
// Set to true once LTO is complete in order prevent further bitcode objects
@ -196,14 +179,10 @@ public:
llvm::DenseMap<StringRef, std::vector<StringRef>> symbolDependencies;
};
inline bool isBitcode(MemoryBufferRef mb) {
return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
}
// Will report a fatal() error if the input buffer is not a valid bitcode
// or wasm object file.
InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "",
uint64_t offsetInArchive = 0);
uint64_t offsetInArchive = 0, bool lazy = false);
// Opens a given file.
std::optional<MemoryBufferRef> readFile(StringRef path);

View File

@ -26,9 +26,13 @@ SymbolTable *symtab;
void SymbolTable::addFile(InputFile *file, StringRef symName) {
log("Processing: " + toString(file));
// .a file
if (auto *f = dyn_cast<ArchiveFile>(file)) {
f->parse();
// Lazy object file
if (file->lazy) {
if (auto *f = dyn_cast<BitcodeFile>(file)) {
f->parseLazy();
} else {
cast<ObjFile>(file)->parseLazy();
}
return;
}
@ -737,16 +741,15 @@ TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) {
return nullptr;
}
void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) {
LLVM_DEBUG(dbgs() << "addLazy: " << sym->getName() << "\n");
StringRef name = sym->getName();
void SymbolTable::addLazy(StringRef name, InputFile *file) {
LLVM_DEBUG(dbgs() << "addLazy: " << name << "\n");
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insertName(name);
if (wasInserted) {
replaceSymbol<LazySymbol>(s, name, 0, file, *sym);
replaceSymbol<LazySymbol>(s, name, 0, file);
return;
}
@ -763,15 +766,15 @@ void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) {
if (auto *f = dyn_cast<UndefinedFunction>(s))
oldSig = f->signature;
LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n");
auto newSym = replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK,
file, *sym);
auto newSym =
replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK, file);
newSym->signature = oldSig;
return;
}
LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
const InputFile *oldFile = s->getFile();
file->addMember(sym);
replaceSymbol<LazySymbol>(s, name, 0, file)->extract();
if (!config->whyExtract.empty())
ctx.whyExtractRecords.emplace_back(toString(oldFile), s->getFile(), *s);
}

View File

@ -83,7 +83,7 @@ public:
TableSymbol *resolveIndirectFunctionTable(bool required);
void addLazy(ArchiveFile *f, const llvm::object::Archive::Symbol *sym);
void addLazy(StringRef name, InputFile *f);
bool addComdat(StringRef name);

View File

@ -13,6 +13,7 @@
#include "InputFiles.h"
#include "OutputSections.h"
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "llvm/Demangle/Demangle.h"
@ -426,23 +427,16 @@ const OutputSectionSymbol *SectionSymbol::getOutputSectionSymbol() const {
}
void LazySymbol::extract() {
cast<ArchiveFile>(file)->addMember(&archiveSymbol);
if (file->lazy) {
file->lazy = false;
symtab->addFile(file, name);
}
}
void LazySymbol::setWeak() {
flags |= (flags & ~WASM_SYMBOL_BINDING_MASK) | WASM_SYMBOL_BINDING_WEAK;
}
MemoryBufferRef LazySymbol::getMemberBuffer() {
Archive::Child c =
CHECK(archiveSymbol.getMember(),
"could not get the member for symbol " + toString(*this));
return CHECK(c.getMemoryBufferRef(),
"could not get the buffer for the member defining symbol " +
toString(*this));
}
void printTraceSymbolUndefined(StringRef name, const InputFile* file) {
message(toString(file) + ": reference to " + name);
}

View File

@ -497,14 +497,12 @@ public:
// symbols into consideration.
class LazySymbol : public Symbol {
public:
LazySymbol(StringRef name, uint32_t flags, InputFile *file,
const llvm::object::Archive::Symbol &sym)
: Symbol(name, LazyKind, flags, file), archiveSymbol(sym) {}
LazySymbol(StringRef name, uint32_t flags, InputFile *file)
: Symbol(name, LazyKind, flags, file) {}
static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
void extract();
void setWeak();
MemoryBufferRef getMemberBuffer();
// Lazy symbols can have a signature because they can replace an
// UndefinedFunction in which case we need to be able to preserve the
@ -512,9 +510,6 @@ public:
// TODO(sbc): This repetition of the signature field is inelegant. Revisit
// the use of class hierarchy to represent symbol taxonomy.
const WasmSignature *signature = nullptr;
private:
llvm::object::Archive::Symbol archiveSymbol;
};
// linker-generated symbols