[lld-macho] LTO: Unset VisibleToRegularObj where possible

This allows LLVM's LTO to internalize symbols that are not referenced
directly by regular objects. Naturally, this means we need to track
which symbols are referenced by regular objects. The approach taken here
is similar to LLD-COFF's: like the COFF port, we extend
`SymbolTable::insert()` to set the isVisibleToRegularObj bit. (LLD-ELF
relies on the Symbol constructor and `Symbol::mergeProperties()`, but
the Mach-O port does not have a `mergeProperties()` equivalent.)

From what I can tell, ld64 (which uses libLTO) doesn't do this
optimization at all. I'm not even sure libLTO provides a way to do this.
Not having ld64's behavior as a reference implementation is unfortunate;
instead, I am relying on LLD-ELF/COFF's behavior as references while
erring on the conservative side. In particular, LLD-MachO will only do
this optimization for executables right now.

We also don't attempt it when `-flat_namespace` is used -- otherwise
we'd need scan the symbol table to find matches for every un-namespaced
symbol reference, which is expensive.

internalize.ll is based off the LLD-ELF tests `internalize-basic.ll` and
`internalize-undef.ll`. Looks like @davide added some of LLD-ELF's internalize
tests, so adding him as a reviewer...

Reviewed By: #lld-macho, gkm

Differential Revision: https://reviews.llvm.org/D99105
This commit is contained in:
Jez Ng 2021-04-15 21:14:29 -04:00
parent f7c9de0de5
commit eb5b7d4497
6 changed files with 119 additions and 25 deletions

View File

@ -25,6 +25,7 @@
using namespace lld;
using namespace lld::macho;
using namespace llvm;
using namespace llvm::MachO;
using namespace llvm::sys;
static lto::Config createConfig() {
@ -39,6 +40,9 @@ static lto::Config createConfig() {
};
c.TimeTraceEnabled = config->timeTraceEnabled;
c.TimeTraceGranularity = config->timeTraceGranularity;
if (config->saveTemps)
checkError(c.addSaveTemps(config->outputFile.str() + ".",
/*UseInputModulePath=*/true));
return c;
}
@ -67,6 +71,12 @@ void BitcodeCompiler::add(BitcodeFile &f) {
// be removed.
r.Prevailing = !objSym.isUndefined() && sym->getFile() == &f;
// FIXME: What about other output types? And we can probably be less
// restrictive with -flat_namespace, but it's an infrequent use case.
r.VisibleToRegularObj = config->outputType != MH_EXECUTE ||
config->namespaceKind == NamespaceKind::flat ||
sym->isUsedInRegularObj;
// Un-define the symbol so that we don't get duplicate symbol errors when we
// load the ObjFile emitted by LTO compilation.
if (r.Prevailing)
@ -74,7 +84,6 @@ void BitcodeCompiler::add(BitcodeFile &f) {
RefState::Strong);
// TODO: set the other resolution configs properly
r.VisibleToRegularObj = true;
}
checkError(ltoObj->add(std::move(f.obj), resols));
}

View File

@ -24,17 +24,22 @@ Symbol *SymbolTable::find(CachedHashStringRef cachedName) {
return symVector[it->second];
}
std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
std::pair<Symbol *, bool> SymbolTable::insert(StringRef name,
const InputFile *file) {
auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()});
// Name already present in the symbol table.
if (!p.second)
return {symVector[p.first->second], false};
Symbol *sym;
if (!p.second) {
// Name already present in the symbol table.
sym = symVector[p.first->second];
} else {
// Name is a new symbol.
sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
symVector.push_back(sym);
}
// Name is a new symbol.
Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
symVector.push_back(sym);
return {sym, true};
sym->isUsedInRegularObj |= !file || isa<ObjFile>(file);
return {sym, p.second};
}
Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
@ -44,7 +49,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
Symbol *s;
bool wasInserted;
bool overridesWeakDef = false;
std::tie(s, wasInserted) = insert(name);
std::tie(s, wasInserted) = insert(name, file);
if (!wasInserted) {
if (auto *defined = dyn_cast<Defined>(s)) {
@ -77,7 +82,7 @@ Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file,
bool isWeakRef) {
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insert(name);
std::tie(s, wasInserted) = insert(name, file);
RefState refState = isWeakRef ? RefState::Weak : RefState::Strong;
@ -96,7 +101,7 @@ Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size,
uint32_t align, bool isPrivateExtern) {
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insert(name);
std::tie(s, wasInserted) = insert(name, file);
if (!wasInserted) {
if (auto *common = dyn_cast<CommonSymbol>(s)) {
@ -117,7 +122,7 @@ Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef,
bool isTlv) {
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insert(name);
std::tie(s, wasInserted) = insert(name, file);
RefState refState = RefState::Unreferenced;
if (!wasInserted) {
@ -149,7 +154,7 @@ Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file,
const object::Archive::Symbol &sym) {
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insert(name);
std::tie(s, wasInserted) = insert(name, file);
if (wasInserted)
replaceSymbol<LazySymbol>(s, file, sym);

View File

@ -60,7 +60,7 @@ public:
Symbol *find(StringRef name) { return find(llvm::CachedHashStringRef(name)); }
private:
std::pair<Symbol *, bool> insert(StringRef name);
std::pair<Symbol *, bool> insert(StringRef name, const InputFile *);
llvm::DenseMap<llvm::CachedHashStringRef, int> symMap;
std::vector<Symbol *> symVector;
};

View File

@ -85,12 +85,17 @@ public:
protected:
Symbol(Kind k, StringRefZ name, InputFile *file)
: symbolKind(k), nameData(name.data), nameSize(name.size), file(file) {}
: symbolKind(k), nameData(name.data), nameSize(name.size), file(file),
isUsedInRegularObj(!file || isa<ObjFile>(file)) {}
Kind symbolKind;
const char *nameData;
mutable uint32_t nameSize;
InputFile *file;
public:
// True if this symbol was referenced by a regular (non-bitcode) object.
bool isUsedInRegularObj;
};
class Defined : public Symbol {
@ -249,7 +254,10 @@ T *replaceSymbol(Symbol *s, ArgT &&...arg) {
assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
"Not a Symbol");
return new (s) T(std::forward<ArgT>(arg)...);
bool isUsedInRegularObj = s->isUsedInRegularObj;
T *sym = new (s) T(std::forward<ArgT>(arg)...);
sym->isUsedInRegularObj |= isUsedInRegularObj;
return sym;
}
} // namespace macho

View File

@ -0,0 +1,72 @@
; REQUIRES: x86
;; Check that we internalize bitcode symbols (only) where possible, i.e. when
;; they are not referenced by undefined symbols originating from non-bitcode
;; files.
; RUN: rm -rf %t; split-file %s %t
; RUN: llvm-as %t/test.s -o %t/test.o
; RUN: llvm-as %t/baz.s -o %t/baz.o
; RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/regular.s -o %t/regular.o
; RUN: %lld -pie -lSystem %t/test.o %t/baz.o %t/regular.o -o %t/test -save-temps
; RUN: llvm-dis < %t/test.0.2.internalize.bc | FileCheck %s
; RUN: llvm-objdump --macho --syms %t/test | FileCheck %s --check-prefix=SYMTAB
;; Check that main is not internalized. This covers the case of bitcode symbols
;; referenced by undefined symbols that don't belong to any InputFile.
; CHECK: define void @main()
;; Check that the foo and bar functions are correctly internalized.
; CHECK: define internal void @bar()
; CHECK: define internal void @foo()
;; Check that a bitcode symbol that is referenced by a regular object file isn't
;; internalized.
; CHECK: define void @used_in_regular_obj()
;; Check that a bitcode symbol that is defined in another bitcode file gets
;; internalized.
; CHECK: define internal void @baz()
; Check foo and bar are not emitted to the .symtab
; SYMTAB-LABEL: SYMBOL TABLE:
; SYMTAB-NEXT: g F __TEXT,__text _main
; SYMTAB-NEXT: g F __TEXT,__text _used_in_regular_obj
; SYMTAB-NEXT: g F __TEXT,__text __mh_execute_header
; SYMTAB-EMPTY:
;--- test.s
target triple = "x86_64-apple-macosx10.15.0"
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
declare void @baz()
define void @main() {
call void @bar()
call void @baz()
ret void
}
define void @bar() {
ret void
}
define hidden void @foo() {
ret void
}
define void @used_in_regular_obj() {
ret void
}
;--- baz.s
target triple = "x86_64-apple-macosx10.15.0"
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
define void @baz() {
ret void
}
;--- regular.s
.data
.quad _used_in_regular_obj

View File

@ -5,15 +5,15 @@
; RUN: rm -rf %t; split-file %s %t
; RUN: llvm-as %t/foo.ll -o %t/foo.o
; RUN: llvm-as %t/test.ll -o %t/test.o
; RUN: %lld -save-temps %t/foo.o %t/test.o -o %t/test
; RUN: llvm-as %t/bar.ll -o %t/bar.o
; RUN: %lld -dylib -save-temps %t/foo.o %t/bar.o -o %t/test
; RUN: llvm-objdump -d --no-show-raw-insn %t/test.lto.o | FileCheck %s --check-prefix=ALL
; RUN: llvm-objdump -d --no-show-raw-insn %t/test | FileCheck %s --check-prefix=ALL
; RUN: rm -rf %t; split-file %s %t
; RUN: opt -module-summary %t/foo.ll -o %t/foo.o
; RUN: opt -module-summary %t/test.ll -o %t/test.o
; RUN: %lld -save-temps %t/foo.o %t/test.o -o %t/test
; RUN: opt -module-summary %t/bar.ll -o %t/bar.o
; RUN: %lld -dylib -save-temps %t/foo.o %t/bar.o -o %t/test
; RUN: llvm-objdump -d --no-show-raw-insn %t/test1.lto.o | FileCheck %s --check-prefix=FOO
; RUN: llvm-objdump -d --no-show-raw-insn %t/test2.lto.o | FileCheck %s --check-prefix=MAIN
; RUN: llvm-objdump -d --no-show-raw-insn %t/test | FileCheck %s --check-prefix=ALL
@ -21,12 +21,12 @@
; FOO: <_foo>:
; FOO-NEXT: retq
; MAIN: <_main>:
; MAIN: <_bar>:
; MAIN-NEXT: retq
; ALL: <_foo>:
; ALL-NEXT: retq
; ALL: <_main>:
; ALL: <_bar>:
; ALL-NEXT: retq
;--- foo.ll
@ -38,11 +38,11 @@ define void @foo() {
ret void
}
;--- test.ll
;--- bar.ll
target triple = "x86_64-apple-macosx10.15.0"
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
define void @main() {
define void @bar() {
ret void
}