Reland [lld-macho]Implement bundle_loader

Reland 1a0afcf518717f61d45a1cdc6ad1a6436ec663b1
  https://reviews.llvm.org/D95913

New change: fix UB bug caused by copying empty path/name. (since the executable does not have a name)
This commit is contained in:
Vy Nguyen 2021-02-22 13:03:02 -05:00
parent bb20cf2f1c
commit 5a856f5b44
9 changed files with 133 additions and 31 deletions

View File

@ -261,7 +261,8 @@ static std::vector<ArchiveMember> getArchiveMembers(MemoryBufferRef mb) {
return v;
}
static InputFile *addFile(StringRef path, bool forceLoadArchive) {
static InputFile *addFile(StringRef path, bool forceLoadArchive,
bool isBundleLoader = false) {
Optional<MemoryBufferRef> buffer = readFile(path);
if (!buffer)
return nullptr;
@ -325,6 +326,16 @@ static InputFile *addFile(StringRef path, bool forceLoadArchive) {
case file_magic::bitcode:
newFile = make<BitcodeFile>(mbref);
break;
case file_magic::macho_executable:
case file_magic::macho_bundle:
// We only allow executable and bundle type here if it is used
// as a bundle loader.
if (!isBundleLoader)
error(path + ": unhandled file type");
if (Optional<DylibFile *> dylibFile =
loadDylib(mbref, nullptr, isBundleLoader))
newFile = *dylibFile;
break;
default:
error(path + ": unhandled file type");
}
@ -756,6 +767,11 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
config->printEachFile = args.hasArg(OPT_t);
config->printWhyLoad = args.hasArg(OPT_why_load);
config->outputType = getOutputType(args);
if (const opt::Arg *arg = args.getLastArg(OPT_bundle_loader)) {
if (config->outputType != MH_BUNDLE)
error("-bundle_loader can only be used with MachO bundle output");
addFile(arg->getValue(), false, true);
}
config->ltoObjPath = args.getLastArgValue(OPT_object_path_lto);
config->ltoNewPassManager =
args.hasFlag(OPT_no_lto_legacy_pass_manager, OPT_lto_legacy_pass_manager,
@ -817,6 +833,7 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
const auto &opt = arg->getOption();
warnIfDeprecatedOption(opt);
warnIfUnimplementedOption(opt);
// TODO: are any of these better handled via filtered() or getLastArg()?
switch (opt.getID()) {
case OPT_INPUT:

View File

@ -44,7 +44,8 @@ std::string createResponseFile(const llvm::opt::InputArgList &args);
llvm::Optional<std::string> resolveDylibPath(llvm::StringRef path);
llvm::Optional<DylibFile *> loadDylib(llvm::MemoryBufferRef mbref,
DylibFile *umbrella = nullptr);
DylibFile *umbrella = nullptr,
bool isBundleLoader = false);
llvm::Optional<InputFile *> loadArchiveMember(MemoryBufferRef, uint32_t modTime,
StringRef archiveName,

View File

@ -176,7 +176,8 @@ Optional<std::string> macho::resolveDylibPath(StringRef path) {
static DenseMap<CachedHashStringRef, DylibFile *> loadedDylibs;
Optional<DylibFile *> macho::loadDylib(MemoryBufferRef mbref,
DylibFile *umbrella) {
DylibFile *umbrella,
bool isBundleLoader) {
StringRef path = mbref.getBufferIdentifier();
DylibFile *&file = loadedDylibs[CachedHashStringRef(path)];
if (file)
@ -190,11 +191,13 @@ Optional<DylibFile *> macho::loadDylib(MemoryBufferRef mbref,
": " + toString(result.takeError()));
return {};
}
file = make<DylibFile>(**result, umbrella);
file = make<DylibFile>(**result, umbrella, isBundleLoader);
} else {
assert(magic == file_magic::macho_dynamically_linked_shared_lib ||
magic == file_magic::macho_dynamically_linked_shared_lib_stub);
file = make<DylibFile>(mbref, umbrella);
magic == file_magic::macho_dynamically_linked_shared_lib_stub ||
magic == file_magic::macho_executable ||
magic == file_magic::macho_bundle);
file = make<DylibFile>(mbref, umbrella, isBundleLoader);
}
return file;
}

View File

@ -605,8 +605,11 @@ void loadReexport(StringRef path, DylibFile *umbrella) {
inputFiles.insert(*reexport);
}
DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella)
: InputFile(DylibKind, mb), refState(RefState::Unreferenced) {
DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
bool isBundleLoader)
: InputFile(DylibKind, mb), refState(RefState::Unreferenced),
isBundleLoader(isBundleLoader) {
assert(!isBundleLoader || !umbrella);
if (umbrella == nullptr)
umbrella = this;
@ -619,7 +622,9 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella)
currentVersion = read32le(&c->dylib.current_version);
compatibilityVersion = read32le(&c->dylib.compatibility_version);
dylibName = reinterpret_cast<const char *>(cmd) + read32le(&c->dylib.name);
} else {
} else if (!isBundleLoader) {
// macho_executable and macho_bundle don't have LC_ID_DYLIB,
// so it's OK.
error("dylib " + toString(this) + " missing LC_ID_DYLIB load command");
return;
}
@ -658,8 +663,12 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella)
}
}
DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella)
: InputFile(DylibKind, interface), refState(RefState::Unreferenced) {
DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
bool isBundleLoader)
: InputFile(DylibKind, interface), refState(RefState::Unreferenced),
isBundleLoader(isBundleLoader) {
// FIXME: Add test for the missing TBD code path.
if (umbrella == nullptr)
umbrella = this;

View File

@ -125,20 +125,28 @@ public:
// the root dylib to ensure symbols in the child library are correctly bound
// to the root. On the other hand, if a dylib is being directly loaded
// (through an -lfoo flag), then `umbrella` should be a nullptr.
explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella = nullptr);
explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella = nullptr,
bool isBundleLoader = false);
explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
DylibFile *umbrella = nullptr);
DylibFile *umbrella = nullptr,
bool isBundleLoader = false);
static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
StringRef dylibName;
uint32_t compatibilityVersion = 0;
uint32_t currentVersion = 0;
uint64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
RefState refState;
bool reexport = false;
bool forceWeakImport = false;
// An executable can be used as a bundle loader that will load the output
// file being linked, and that contains symbols referenced, but not
// implemented in the bundle. When used like this, it is very similar
// to a Dylib, so we re-used the same class to represent it.
bool isBundleLoader;
};
// .a file

View File

@ -396,7 +396,6 @@ def grp_bundle : OptionGroup<"bundle">, HelpText<"CREATING A BUNDLE">;
def bundle_loader : Separate<["-"], "bundle_loader">,
MetaVarName<"<executable>">,
HelpText<"Resolve undefined symbols from <executable>">,
Flags<[HelpHidden]>,
Group<grp_bundle>;
def grp_object : OptionGroup<"object">, HelpText<"CREATING AN OBJECT FILE">;

View File

@ -227,7 +227,7 @@ struct Binding {
OutputSegment *segment = nullptr;
uint64_t offset = 0;
int64_t addend = 0;
uint8_t ordinal = 0;
int16_t ordinal = 0;
};
} // namespace
@ -274,18 +274,24 @@ static void encodeBinding(const Symbol *sym, const OutputSection *osec,
}
// Non-weak bindings need to have their dylib ordinal encoded as well.
static void encodeDylibOrdinal(const DylibSymbol *dysym, Binding &lastBinding,
static void encodeDylibOrdinal(const DylibSymbol *dysym, Binding *lastBinding,
raw_svector_ostream &os) {
using namespace llvm::MachO;
if (lastBinding.ordinal != dysym->getFile()->ordinal) {
if (dysym->getFile()->ordinal <= BIND_IMMEDIATE_MASK) {
if (lastBinding == nullptr ||
lastBinding->ordinal != dysym->getFile()->ordinal) {
if (dysym->getFile()->ordinal <= 0) {
os << static_cast<uint8_t>(
BIND_OPCODE_SET_DYLIB_SPECIAL_IMM |
(dysym->getFile()->ordinal & BIND_IMMEDIATE_MASK));
} else if (dysym->getFile()->ordinal <= BIND_IMMEDIATE_MASK) {
os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
dysym->getFile()->ordinal);
} else {
os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
encodeULEB128(dysym->getFile()->ordinal, os);
}
lastBinding.ordinal = dysym->getFile()->ordinal;
if (lastBinding != nullptr)
lastBinding->ordinal = dysym->getFile()->ordinal;
}
}
@ -321,7 +327,7 @@ void BindingSection::finalizeContents() {
return a.target.getVA() < b.target.getVA();
});
for (const BindingEntry &b : bindings) {
encodeDylibOrdinal(b.dysym, lastBinding, os);
encodeDylibOrdinal(b.dysym, &lastBinding, os);
if (auto *isec = b.target.section.dyn_cast<const InputSection *>()) {
encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.target.offset,
b.addend, /*isWeakBinding=*/false, lastBinding, os);
@ -543,13 +549,7 @@ uint32_t LazyBindingSection::encode(const DylibSymbol &sym) {
uint64_t offset = in.lazyPointers->addr - dataSeg->firstSection()->addr +
sym.stubsIndex * WordSize;
encodeULEB128(offset, os);
if (sym.getFile()->ordinal <= MachO::BIND_IMMEDIATE_MASK) {
os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
sym.getFile()->ordinal);
} else {
os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
encodeULEB128(sym.getFile()->ordinal, os);
}
encodeDylibOrdinal(&sym, nullptr, os);
uint8_t flags = MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
if (sym.isWeakRef())
@ -810,7 +810,12 @@ void SymtabSection::writeTo(uint8_t *buf) const {
nList->n_desc |= defined->isExternalWeakDef() ? MachO::N_WEAK_DEF : 0;
} else if (auto *dysym = dyn_cast<DylibSymbol>(entry.sym)) {
uint16_t n_desc = nList->n_desc;
MachO::SET_LIBRARY_ORDINAL(n_desc, dysym->getFile()->ordinal);
if (dysym->getFile()->isBundleLoader)
MachO::SET_LIBRARY_ORDINAL(n_desc, MachO::EXECUTABLE_ORDINAL);
else
MachO::SET_LIBRARY_ORDINAL(
n_desc, static_cast<uint8_t>(dysym->getFile()->ordinal));
nList->n_type = MachO::N_EXT;
n_desc |= dysym->isWeakRef() ? MachO::N_WEAK_REF : 0;
nList->n_desc = n_desc;

View File

@ -497,9 +497,18 @@ void Writer::createLoadCommands() {
in.header->addLoadCommand(make<LCBuildVersion>(config->platform));
uint64_t dylibOrdinal = 1;
int64_t dylibOrdinal = 1;
for (InputFile *file : inputFiles) {
if (auto *dylibFile = dyn_cast<DylibFile>(file)) {
if (dylibFile->isBundleLoader) {
dylibFile->ordinal = MachO::BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE;
// Shortcut since bundle-loader does not re-export the symbols.
dylibFile->reexport = false;
continue;
}
dylibFile->ordinal = dylibOrdinal++;
LoadCommandType lcType =
dylibFile->forceWeakImport || dylibFile->refState == RefState::Weak
? LC_LOAD_WEAK_DYLIB
@ -507,7 +516,6 @@ void Writer::createLoadCommands() {
in.header->addLoadCommand(make<LCDylib>(lcType, dylibFile->dylibName,
dylibFile->compatibilityVersion,
dylibFile->currentVersion));
dylibFile->ordinal = dylibOrdinal++;
if (dylibFile->reexport)
in.header->addLoadCommand(

View File

@ -0,0 +1,52 @@
# REQUIRES: x86
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/2.s -o %t/2.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/3.s -o %t/3.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/main.s -o %t/main.o
# RUN: %lld -lSystem -dylib -install_name %t/my_lib.dylib -o %t/mylib.dylib %t/2.o
# RUN: %lld %t/2.o %t/main.o -o %t/main
# RUN: %lld -lSystem -bundle -bundle_loader %t/main -o %t/bundle.bundle %t/3.o %t/mylib.dylib
# Check bundle.bundle to ensure the `my_func` symbol is from executable
# RUN: llvm-nm -m %t/bundle.bundle | FileCheck %s --check-prefix BUNDLE
# BUNDLE: (undefined) external _main (from executable)
# BUNDLE: (undefined) external my_func (from executable)
# RUN: llvm-objdump --macho --lazy-bind %t/bundle.bundle | FileCheck %s --check-prefix BUNDLE-OBJ
# BUNDLE-OBJ: segment section address dylib symbol
# BUNDLE-OBJ: __DATA __la_symbol_ptr 0x{{[0-9a-f]*}} main-executable my_fun
# RUN: %lld -lSystem -bundle -bundle_loader %t/main -o %t/bundle2.bundle %t/3.o %t/2.o
# Check bundle2.bundle to ensure that _main is still from executable
# but my_func is not.
# RUN: llvm-nm -m %t/bundle2.bundle | FileCheck %s --check-prefix BUNDLE2
# BUNDLE2: (undefined) external _main (from executable)
# BUNDLE2: (__TEXT,__text) external my_func
# Test that bundle_loader can only be used with MachO bundle output.
# RUN: not %lld -lSystem -bundle_loader %t/main -o %t/bundle3.bundle 2>&1 | FileCheck %s --check-prefix ERROR
# ERROR: -bundle_loader can only be used with MachO bundle output
#--- 2.s
# my_lib: This contains the exported function
.globl my_func
my_func:
retq
#--- 3.s
# my_user.s: This is the user/caller of the
# exported function
.text
my_user:
callq my_func()
retq
#--- main.s
# main.s: dummy exec/main loads the exported function.
# This is basically a way to say `my_user` should get
# `my_func` from this executable.
.globl _main
.text
_main:
retq