[lld/mac] Implement -dead_strip

Also adds support for live_support sections, no_dead_strip sections,
.no_dead_strip symbols.

Chromium Framework 345MB unstripped -> 250MB stripped
(vs 290MB unstripped -> 236M stripped with ld64).

Doing dead stripping is a bit faster than not, because so much less
data needs to be processed:

    % ministat lld_*
    x lld_nostrip.txt
    + lld_strip.txt
        N           Min           Max        Median           Avg        Stddev
    x  10      3.929414       4.07692     4.0269079     4.0089678   0.044214794
    +  10     3.8129408     3.9025559     3.8670411     3.8642573   0.024779651
    Difference at 95.0% confidence
            -0.144711 +/- 0.0336749
            -3.60967% +/- 0.839989%
            (Student's t, pooled s = 0.0358398)

This interacts with many parts of the linker. I tried to add test coverage
for all added `isLive()` checks, so that some test will fail if any of them
is removed. I checked that the test expectations for the most part match
ld64's behavior (except for live-support-iterations.s, see the comment
in the test). Interacts with:
- debug info
- export tries
- import opcodes
- flags like -exported_symbol(s_list)
- -U / dynamic_lookup
- mod_init_funcs, mod_term_funcs
- weak symbol handling
- unwind info
- stubs
- map files
- -sectcreate
- undefined, dylib, common, defined (both absolute and normal) symbols

It's possible it interacts with more features I didn't think of,
of course.

I also did some manual testing:
- check-llvm check-clang check-lld work with lld with this patch
  as host linker and -dead_strip enabled
- Chromium still starts
- Chromium's base_unittests still pass, including unwind tests

Implemenation-wise, this is InputSection-based, so it'll work for
object files with .subsections_via_symbols (which includes all
object files generated by clang). I first based this on the COFF
implementation, but later realized that things are more similar to ELF.
I think it'd be good to refactor MarkLive.cpp to look more like the ELF
part at some point, but I'd like to get a working state checked in first.

Mechanical parts:
- Rename canOmitFromOutput to wasCoalesced (no behavior change)
  since it really is for weak coalesced symbols
- Add noDeadStrip to Defined, corresponding to N_NO_DEAD_STRIP
  (`.no_dead_strip` in asm)

Fixes PR49276.

Differential Revision: https://reviews.llvm.org/D103324
This commit is contained in:
Nico Weber 2021-05-07 17:10:05 -04:00
parent 66a1ecd2cf
commit a5645513db
24 changed files with 1098 additions and 58 deletions

View File

@ -19,6 +19,7 @@ add_lld_library(lldMachO2
InputSection.cpp InputSection.cpp
LTO.cpp LTO.cpp
MapFile.cpp MapFile.cpp
MarkLive.cpp
ObjC.cpp ObjC.cpp
OutputSection.cpp OutputSection.cpp
OutputSegment.cpp OutputSegment.cpp

View File

@ -299,7 +299,8 @@ void ConcatOutputSection::finalize() {
r.referent = thunkInfo.sym = symtab->addDefined( r.referent = thunkInfo.sym = symtab->addDefined(
thunkName, /*file=*/nullptr, thunkInfo.isec, /*value=*/0, thunkName, /*file=*/nullptr, thunkInfo.isec, /*value=*/0,
/*size=*/thunkSize, /*isWeakDef=*/false, /*isPrivateExtern=*/true, /*size=*/thunkSize, /*isWeakDef=*/false, /*isPrivateExtern=*/true,
/*isThumb=*/false, /*isReferencedDynamically=*/false); /*isThumb=*/false, /*isReferencedDynamically=*/false,
/*noDeadStrip=*/false);
target->populateThunk(thunkInfo.isec, funcSym); target->populateThunk(thunkInfo.isec, funcSym);
finalizeOne(thunkInfo.isec); finalizeOne(thunkInfo.isec);
thunks.push_back(thunkInfo.isec); thunks.push_back(thunkInfo.isec);

View File

@ -111,6 +111,7 @@ struct Configuration {
llvm::StringRef thinLTOJobs; llvm::StringRef thinLTOJobs;
bool deadStripDylibs = false; bool deadStripDylibs = false;
bool demangle = false; bool demangle = false;
bool deadStrip = false;
PlatformInfo platformInfo; PlatformInfo platformInfo;
NamespaceKind namespaceKind = NamespaceKind::twolevel; NamespaceKind namespaceKind = NamespaceKind::twolevel;
UndefinedSymbolTreatment undefinedSymbolTreatment = UndefinedSymbolTreatment undefinedSymbolTreatment =

View File

@ -10,6 +10,7 @@
#include "Config.h" #include "Config.h"
#include "InputFiles.h" #include "InputFiles.h"
#include "LTO.h" #include "LTO.h"
#include "MarkLive.h"
#include "ObjC.h" #include "ObjC.h"
#include "OutputSection.h" #include "OutputSection.h"
#include "OutputSegment.h" #include "OutputSegment.h"
@ -541,12 +542,15 @@ static void replaceCommonSymbols() {
isec->flags = S_ZEROFILL; isec->flags = S_ZEROFILL;
inputSections.push_back(isec); inputSections.push_back(isec);
// FIXME: CommonSymbol should store isReferencedDynamically, noDeadStrip
// and pass them on here.
replaceSymbol<Defined>(sym, sym->getName(), isec->file, isec, /*value=*/0, replaceSymbol<Defined>(sym, sym->getName(), isec->file, isec, /*value=*/0,
/*size=*/0, /*size=*/0,
/*isWeakDef=*/false, /*isWeakDef=*/false,
/*isExternal=*/true, common->privateExtern, /*isExternal=*/true, common->privateExtern,
/*isThumb=*/false, /*isThumb=*/false,
/*isReferencedDynamically=*/false); /*isReferencedDynamically=*/false,
/*noDeadStrip=*/false);
} }
} }
@ -967,6 +971,9 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
depTracker = depTracker =
make<DependencyTracker>(args.getLastArgValue(OPT_dependency_info)); make<DependencyTracker>(args.getLastArgValue(OPT_dependency_info));
// Must be set before any InputSections and Symbols are created.
config->deadStrip = args.hasArg(OPT_dead_strip);
config->systemLibraryRoots = getSystemLibraryRoots(args); config->systemLibraryRoots = getSystemLibraryRoots(args);
if (const char *path = getReproduceOption(args)) { if (const char *path = getReproduceOption(args)) {
// Note that --reproduce is a debug option so you can ignore it // Note that --reproduce is a debug option so you can ignore it
@ -1285,6 +1292,9 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
} }
} }
if (config->deadStrip)
markLive();
// Write to an output file. // Write to an output file.
if (target->wordSize == 8) if (target->wordSize == 8)
writeResult<LP64>(); writeResult<LP64>();

View File

@ -488,10 +488,10 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
if (isWeakDefCanBeHidden) if (isWeakDefCanBeHidden)
isPrivateExtern = true; isPrivateExtern = true;
return symtab->addDefined(name, isec->file, isec, value, size, return symtab->addDefined(
sym.n_desc & N_WEAK_DEF, isPrivateExtern, name, isec->file, isec, value, size, sym.n_desc & N_WEAK_DEF,
sym.n_desc & N_ARM_THUMB_DEF, isPrivateExtern, sym.n_desc & N_ARM_THUMB_DEF,
sym.n_desc & REFERENCED_DYNAMICALLY); sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP);
} }
assert(!isWeakDefCanBeHidden && assert(!isWeakDefCanBeHidden &&
@ -499,7 +499,8 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
return make<Defined>( return make<Defined>(
name, isec->file, isec, value, size, sym.n_desc & N_WEAK_DEF, name, isec->file, isec, value, size, sym.n_desc & N_WEAK_DEF,
/*isExternal=*/false, /*isPrivateExtern=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & REFERENCED_DYNAMICALLY); sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & REFERENCED_DYNAMICALLY,
sym.n_desc & N_NO_DEAD_STRIP);
} }
// Absolute symbols are defined symbols that do not have an associated // Absolute symbols are defined symbols that do not have an associated
@ -512,13 +513,15 @@ static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
return symtab->addDefined(name, file, nullptr, sym.n_value, /*size=*/0, return symtab->addDefined(name, file, nullptr, sym.n_value, /*size=*/0,
/*isWeakDef=*/false, sym.n_type & N_PEXT, /*isWeakDef=*/false, sym.n_type & N_PEXT,
sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & N_ARM_THUMB_DEF,
/*isReferencedDynamically=*/false); /*isReferencedDynamically=*/false,
sym.n_desc & N_NO_DEAD_STRIP);
} }
return make<Defined>(name, file, nullptr, sym.n_value, /*size=*/0, return make<Defined>(name, file, nullptr, sym.n_value, /*size=*/0,
/*isWeakDef=*/false, /*isWeakDef=*/false,
/*isExternal=*/false, /*isPrivateExtern=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & N_ARM_THUMB_DEF,
/*isReferencedDynamically=*/false); /*isReferencedDynamically=*/false,
sym.n_desc & N_NO_DEAD_STRIP);
} }
template <class NList> template <class NList>
@ -614,7 +617,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
auto *nextIsec = make<InputSection>(*isec); auto *nextIsec = make<InputSection>(*isec);
nextIsec->data = isec->data.slice(symbolOffset); nextIsec->data = isec->data.slice(symbolOffset);
nextIsec->numRefs = 0; nextIsec->numRefs = 0;
nextIsec->canOmitFromOutput = false; nextIsec->wasCoalesced = false;
isec->data = isec->data.slice(0, symbolOffset); isec->data = isec->data.slice(0, symbolOffset);
// By construction, the symbol will be at offset zero in the new // By construction, the symbol will be at offset zero in the new
@ -640,6 +643,7 @@ OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
isec->segname = segName.take_front(16); isec->segname = segName.take_front(16);
const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
isec->data = {buf, mb.getBufferSize()}; isec->data = {buf, mb.getBufferSize()};
isec->live = true;
subsections.push_back({{0, isec}}); subsections.push_back({{0, isec}});
} }
@ -1027,7 +1031,8 @@ static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
return symtab->addDefined(name, &file, /*isec=*/nullptr, /*value=*/0, return symtab->addDefined(name, &file, /*isec=*/nullptr, /*value=*/0,
/*size=*/0, objSym.isWeak(), isPrivateExtern, /*size=*/0, objSym.isWeak(), isPrivateExtern,
/*isThumb=*/false, /*isThumb=*/false,
/*isReferencedDynamically=*/false); /*isReferencedDynamically=*/false,
/*noDeadStrip=*/false);
} }
BitcodeFile::BitcodeFile(MemoryBufferRef mbref) BitcodeFile::BitcodeFile(MemoryBufferRef mbref)

View File

@ -9,6 +9,7 @@
#ifndef LLD_MACHO_INPUT_SECTION_H #ifndef LLD_MACHO_INPUT_SECTION_H
#define LLD_MACHO_INPUT_SECTION_H #define LLD_MACHO_INPUT_SECTION_H
#include "Config.h"
#include "Relocations.h" #include "Relocations.h"
#include "lld/Common/LLVM.h" #include "lld/Common/LLVM.h"
@ -47,17 +48,17 @@ public:
// How many symbols refer to this InputSection. // How many symbols refer to this InputSection.
uint32_t numRefs = 0; uint32_t numRefs = 0;
// True if this InputSection could not be written to the output file. // With subsections_via_symbols, most symbols have their own InputSection,
// With subsections_via_symbols, most symbol have its own InputSection,
// and for weak symbols (e.g. from inline functions), only the // and for weak symbols (e.g. from inline functions), only the
// InputSection from one translation unit will make it to the output, // InputSection from one translation unit will make it to the output,
// while all copies in other translation units are coalesced into the // while all copies in other translation units are coalesced into the
// first and not copied to the output. // first and not copied to the output.
bool canOmitFromOutput = false; bool wasCoalesced = false;
bool shouldOmitFromOutput() const { bool isCoalescedWeak() const { return wasCoalesced && numRefs == 0; }
return canOmitFromOutput && numRefs == 0; bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
}
bool live = !config->deadStrip;
ArrayRef<uint8_t> data; ArrayRef<uint8_t> data;
std::vector<Reloc> relocs; std::vector<Reloc> relocs;

View File

@ -64,11 +64,9 @@ static std::vector<Defined *> getSymbols() {
for (InputFile *file : inputFiles) for (InputFile *file : inputFiles)
if (isa<ObjFile>(file)) if (isa<ObjFile>(file))
for (Symbol *sym : file->symbols) { for (Symbol *sym : file->symbols) {
if (sym == nullptr) if (auto *d = dyn_cast_or_null<Defined>(sym))
continue; if (d->isLive() && d->isec && d->getFile() == file) {
if (auto *d = dyn_cast<Defined>(sym)) assert(!d->isec->isCoalescedWeak() &&
if (d->isec && d->getFile() == file) {
assert(!d->isec->shouldOmitFromOutput() &&
"file->symbols should store resolved symbols"); "file->symbols should store resolved symbols");
v.push_back(d); v.push_back(d);
} }

189
lld/MachO/MarkLive.cpp Normal file
View File

@ -0,0 +1,189 @@
//===- MarkLive.cpp -------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MarkLive.h"
#include "Config.h"
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "UnwindInfoSection.h"
#include "mach-o/compact_unwind_encoding.h"
#include "llvm/Support/TimeProfiler.h"
namespace lld {
namespace macho {
using namespace llvm;
using namespace llvm::MachO;
// Set live bit on for each reachable chunk. Unmarked (unreachable)
// InputSections will be ignored by Writer, so they will be excluded
// from the final output.
void markLive() {
TimeTraceScope timeScope("markLive");
// We build up a worklist of sections which have been marked as live. We only
// push into the worklist when we discover an unmarked section, and we mark
// as we push, so sections never appear twice in the list.
SmallVector<InputSection *, 256> worklist;
auto enqueue = [&](InputSection *s) {
if (s->live)
return;
s->live = true;
worklist.push_back(s);
};
auto addSym = [&](Symbol *s) {
s->used = true;
if (auto *d = dyn_cast<Defined>(s))
if (d->isec)
enqueue(d->isec);
};
// Add GC roots.
if (config->entry)
addSym(config->entry);
for (Symbol *sym : symtab->getSymbols()) {
if (auto *defined = dyn_cast<Defined>(sym)) {
// -exported_symbol(s_list)
if (!config->exportedSymbols.empty() &&
config->exportedSymbols.match(defined->getName())) {
// FIXME: Instead of doing this here, maybe the Driver code doing
// the matching should add them to explicitUndefineds? Then the
// explicitUndefineds code below would handle this automatically.
assert(!defined->privateExtern &&
"should have been rejected by driver");
addSym(defined);
continue;
}
// public symbols explicitly marked .no_dead_strip
if (defined->referencedDynamically || defined->noDeadStrip) {
addSym(defined);
continue;
}
// FIXME: When we implement these flags, make symbols from them GC roots:
// * -reexported_symbol(s_list)
// * -alias(-list)
// * -init
// In dylibs and bundles, all external functions are GC roots.
// FIXME: -export_dynamic should enable this for executables too.
if (config->outputType != MH_EXECUTE && !defined->privateExtern) {
addSym(defined);
continue;
}
}
}
// -u symbols
for (Symbol *sym : config->explicitUndefineds)
if (auto *defined = dyn_cast<Defined>(sym))
addSym(defined);
// local symbols explicitly marked .no_dead_strip
for (const InputFile *file : inputFiles)
if (auto *objFile = dyn_cast<ObjFile>(file))
for (Symbol *sym : objFile->symbols)
if (auto *defined = dyn_cast_or_null<Defined>(sym))
if (!defined->isExternal() && defined->noDeadStrip)
addSym(defined);
if (auto *stubBinder =
dyn_cast_or_null<DylibSymbol>(symtab->find("dyld_stub_binder")))
addSym(stubBinder);
for (InputSection *isec : inputSections) {
// Sections marked no_dead_strip
if (isec->flags & S_ATTR_NO_DEAD_STRIP) {
enqueue(isec);
continue;
}
// mod_init_funcs, mod_term_funcs sections
if (sectionType(isec->flags) == S_MOD_INIT_FUNC_POINTERS ||
sectionType(isec->flags) == S_MOD_TERM_FUNC_POINTERS) {
enqueue(isec);
continue;
}
// Dead strip runs before UnwindInfoSection handling so we need to keep
// __LD,__compact_unwind alive here.
// But that section contains absolute references to __TEXT,__text and
// keeps most code alive due to that. So we can't just enqueue() the
// section: We must skip the relocations for the functionAddress
// in each CompactUnwindEntry.
// See also scanEhFrameSection() in lld/ELF/MarkLive.cpp.
if (isec->segname == segment_names::ld &&
isec->name == section_names::compactUnwind) {
isec->live = true;
const int compactUnwindEntrySize =
target->wordSize == 8 ? sizeof(CompactUnwindEntry<uint64_t>)
: sizeof(CompactUnwindEntry<uint32_t>);
for (const Reloc &r : isec->relocs) {
// This is the relocation for the address of the function itself.
// Ignore it, else these would keep everything alive.
if (r.offset % compactUnwindEntrySize == 0)
continue;
if (auto *s = r.referent.dyn_cast<Symbol *>())
addSym(s);
else {
auto *referentIsec = r.referent.get<InputSection *>();
assert(!referentIsec->isCoalescedWeak());
enqueue(referentIsec);
}
}
continue;
}
}
do {
// Mark things reachable from GC roots as live.
while (!worklist.empty()) {
InputSection *s = worklist.pop_back_val();
assert(s->live && "We mark as live when pushing onto the worklist!");
// Mark all symbols listed in the relocation table for this section.
for (const Reloc &r : s->relocs) {
if (auto *s = r.referent.dyn_cast<Symbol *>()) {
addSym(s);
} else {
auto *referentIsec = r.referent.get<InputSection *>();
assert(!referentIsec->isCoalescedWeak());
enqueue(referentIsec);
}
}
}
// S_ATTR_LIVE_SUPPORT sections are live if they point _to_ a live section.
// Process them in a second pass.
for (InputSection *isec : inputSections) {
// FIXME: Check if copying all S_ATTR_LIVE_SUPPORT sections into a
// separate vector and only walking that here is faster.
if (!(isec->flags & S_ATTR_LIVE_SUPPORT) || isec->live)
continue;
for (const Reloc &r : isec->relocs) {
bool referentLive;
if (auto *s = r.referent.dyn_cast<Symbol *>())
referentLive = s->isLive();
else
referentLive = r.referent.get<InputSection *>()->live;
if (referentLive)
enqueue(isec);
}
}
// S_ATTR_LIVE_SUPPORT could have marked additional sections live,
// which in turn could mark additional S_ATTR_LIVE_SUPPORT sections live.
// Iterate. In practice, the second iteration won't mark additional
// S_ATTR_LIVE_SUPPORT sections live.
} while (!worklist.empty());
}
} // namespace macho
} // namespace lld

20
lld/MachO/MarkLive.h Normal file
View File

@ -0,0 +1,20 @@
//===- MarkLive.h -----------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLD_MACHO_MARKLIVE_H
#define LLD_MACHO_MARKLIVE_H
namespace lld {
namespace macho {
void markLive();
} // namespace macho
} // namespace lld
#endif // LLD_MACHO_MARKLIVE_H

View File

@ -218,7 +218,6 @@ def grp_opts : OptionGroup<"opts">, HelpText<"OPTIMIZATIONS">;
def dead_strip : Flag<["-"], "dead_strip">, def dead_strip : Flag<["-"], "dead_strip">,
HelpText<"Remove unreachable functions and data">, HelpText<"Remove unreachable functions and data">,
Flags<[HelpHidden]>,
Group<grp_opts>; Group<grp_opts>;
def order_file : Separate<["-"], "order_file">, def order_file : Separate<["-"], "order_file">,
MetaVarName<"<file>">, MetaVarName<"<file>">,

View File

@ -46,7 +46,8 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
InputSection *isec, uint64_t value, InputSection *isec, uint64_t value,
uint64_t size, bool isWeakDef, uint64_t size, bool isWeakDef,
bool isPrivateExtern, bool isThumb, bool isPrivateExtern, bool isThumb,
bool isReferencedDynamically) { bool isReferencedDynamically,
bool noDeadStrip) {
Symbol *s; Symbol *s;
bool wasInserted; bool wasInserted;
bool overridesWeakDef = false; bool overridesWeakDef = false;
@ -63,6 +64,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
// If one of them isn't private extern, the merged symbol isn't. // If one of them isn't private extern, the merged symbol isn't.
defined->privateExtern &= isPrivateExtern; defined->privateExtern &= isPrivateExtern;
defined->referencedDynamically |= isReferencedDynamically; defined->referencedDynamically |= isReferencedDynamically;
defined->noDeadStrip |= noDeadStrip;
// FIXME: Handle this for bitcode files. // FIXME: Handle this for bitcode files.
// FIXME: We currently only do this if both symbols are weak. // FIXME: We currently only do this if both symbols are weak.
@ -70,7 +72,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
// case where !isWeakDef && defined->isWeakDef() right // case where !isWeakDef && defined->isWeakDef() right
// requires some care and testing). // requires some care and testing).
if (isec) if (isec)
isec->canOmitFromOutput = true; isec->wasCoalesced = true;
} }
return defined; return defined;
@ -89,7 +91,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
Defined *defined = replaceSymbol<Defined>( Defined *defined = replaceSymbol<Defined>(
s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true, s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true,
isPrivateExtern, isThumb, isReferencedDynamically); isPrivateExtern, isThumb, isReferencedDynamically, noDeadStrip);
defined->overridesWeakDef = overridesWeakDef; defined->overridesWeakDef = overridesWeakDef;
return defined; return defined;
} }
@ -188,7 +190,8 @@ Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec,
bool referencedDynamically) { bool referencedDynamically) {
Defined *s = addDefined(name, nullptr, isec, value, /*size=*/0, Defined *s = addDefined(name, nullptr, isec, value, /*size=*/0,
/*isWeakDef=*/false, isPrivateExtern, /*isWeakDef=*/false, isPrivateExtern,
/*isThumb=*/false, referencedDynamically); /*isThumb=*/false, referencedDynamically,
/*noDeadStrip=*/false);
s->includeInSymtab = includeInSymtab; s->includeInSymtab = includeInSymtab;
return s; return s;
} }

View File

@ -40,7 +40,7 @@ public:
Defined *addDefined(StringRef name, InputFile *, InputSection *, Defined *addDefined(StringRef name, InputFile *, InputSection *,
uint64_t value, uint64_t size, bool isWeakDef, uint64_t value, uint64_t size, bool isWeakDef,
bool isPrivateExtern, bool isThumb, bool isPrivateExtern, bool isThumb,
bool isReferencedDynamically); bool isReferencedDynamically, bool noDeadStrip);
Symbol *addUndefined(StringRef name, InputFile *, bool isWeakRef); Symbol *addUndefined(StringRef name, InputFile *, bool isWeakRef);

View File

@ -31,7 +31,29 @@ uint64_t Symbol::getStubVA() const { return in.stubs->getVA(stubsIndex); }
uint64_t Symbol::getGotVA() const { return in.got->getVA(gotIndex); } uint64_t Symbol::getGotVA() const { return in.got->getVA(gotIndex); }
uint64_t Symbol::getTlvVA() const { return in.tlvPointers->getVA(gotIndex); } uint64_t Symbol::getTlvVA() const { return in.tlvPointers->getVA(gotIndex); }
bool Symbol::isLive() const {
if (isa<DylibSymbol>(this) || isa<Undefined>(this))
return used;
if (auto *d = dyn_cast<Defined>(this)) {
// Non-absolute symbols might be alive because their section is
// no_dead_strip or live_support. In that case, the section will know
// that it's live but `used` might be false. Non-absolute symbols always
// have to use the section's `live` bit as source of truth.
return d->isAbsolute() ? used : d->isec->live;
}
assert(!isa<CommonSymbol>(this) &&
"replaceCommonSymbols() runs before dead code stripping, and isLive() "
"should only be called after dead code stripping");
// Assume any other kind of symbol is live.
return true;
}
uint64_t Defined::getVA() const { uint64_t Defined::getVA() const {
assert(isLive() && "this should only be called for live symbols");
if (isAbsolute()) if (isAbsolute())
return value; return value;

View File

@ -51,6 +51,8 @@ public:
return {nameData, nameSize}; return {nameData, nameSize};
} }
bool isLive() const;
virtual uint64_t getVA() const { return 0; } virtual uint64_t getVA() const { return 0; }
virtual uint64_t getFileOffset() const { virtual uint64_t getFileOffset() const {
@ -96,7 +98,8 @@ public:
protected: protected:
Symbol(Kind k, StringRefZ name, InputFile *file) Symbol(Kind k, StringRefZ name, InputFile *file)
: symbolKind(k), nameData(name.data), nameSize(name.size), file(file), : symbolKind(k), nameData(name.data), nameSize(name.size), file(file),
isUsedInRegularObj(!file || isa<ObjFile>(file)) {} isUsedInRegularObj(!file || isa<ObjFile>(file)),
used(!config->deadStrip) {}
Kind symbolKind; Kind symbolKind;
const char *nameData; const char *nameData;
@ -105,19 +108,22 @@ protected:
public: public:
// True if this symbol was referenced by a regular (non-bitcode) object. // True if this symbol was referenced by a regular (non-bitcode) object.
bool isUsedInRegularObj; bool isUsedInRegularObj : 1;
// True if an undefined or dylib symbol is used from a live section.
bool used : 1;
}; };
class Defined : public Symbol { class Defined : public Symbol {
public: public:
Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
bool isThumb, bool isReferencedDynamically) bool isThumb, bool isReferencedDynamically, bool noDeadStrip)
: Symbol(DefinedKind, name, file), isec(isec), value(value), size(size), : Symbol(DefinedKind, name, file), isec(isec), value(value), size(size),
overridesWeakDef(false), privateExtern(isPrivateExtern), overridesWeakDef(false), privateExtern(isPrivateExtern),
includeInSymtab(true), thumb(isThumb), includeInSymtab(true), thumb(isThumb),
referencedDynamically(isReferencedDynamically), weakDef(isWeakDef), referencedDynamically(isReferencedDynamically),
external(isExternal) { noDeadStrip(noDeadStrip), weakDef(isWeakDef), external(isExternal) {
if (isec) if (isec)
isec->numRefs++; isec->numRefs++;
} }
@ -156,7 +162,14 @@ public:
// symbol table by tools like strip. In theory, this could be set on arbitrary // symbol table by tools like strip. In theory, this could be set on arbitrary
// symbols in input object files. In practice, it's used solely for the // symbols in input object files. In practice, it's used solely for the
// synthetic __mh_execute_header symbol. // synthetic __mh_execute_header symbol.
// This is information for the static linker, and it's also written to the
// output file's symbol table for tools running later (such as `strip`).
bool referencedDynamically : 1; bool referencedDynamically : 1;
// Set on symbols that should not be removed by dead code stripping.
// Set for example on `__attribute__((used))` globals, or on some Objective-C
// metadata. This is information only for the static linker and not written
// to the output.
bool noDeadStrip : 1;
private: private:
const bool weakDef : 1; const bool weakDef : 1;

View File

@ -469,11 +469,15 @@ void StubHelperSection::setup() {
in.got->addEntry(stubBinder); in.got->addEntry(stubBinder);
inputSections.push_back(in.imageLoaderCache); inputSections.push_back(in.imageLoaderCache);
// Since this isn't in the symbol table or in any input file, the noDeadStrip
// argument doesn't matter. It's kept alive by ImageLoaderCacheSection()
// setting `live` to true on the backing InputSection.
dyldPrivate = dyldPrivate =
make<Defined>("__dyld_private", nullptr, in.imageLoaderCache, 0, 0, make<Defined>("__dyld_private", nullptr, in.imageLoaderCache, 0, 0,
/*isWeakDef=*/false, /*isWeakDef=*/false,
/*isExternal=*/false, /*isPrivateExtern=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
/*isThumb=*/false, /*isReferencedDynamically=*/false); /*isThumb=*/false, /*isReferencedDynamically=*/false,
/*noDeadStrip=*/false);
} }
ImageLoaderCacheSection::ImageLoaderCacheSection() { ImageLoaderCacheSection::ImageLoaderCacheSection() {
@ -483,6 +487,7 @@ ImageLoaderCacheSection::ImageLoaderCacheSection() {
memset(arr, 0, target->wordSize); memset(arr, 0, target->wordSize);
data = {arr, target->wordSize}; data = {arr, target->wordSize};
align = target->wordSize; align = target->wordSize;
live = true;
} }
LazyPointerSection::LazyPointerSection() LazyPointerSection::LazyPointerSection()
@ -571,7 +576,7 @@ void ExportSection::finalizeContents() {
trieBuilder.setImageBase(in.header->addr); trieBuilder.setImageBase(in.header->addr);
for (const Symbol *sym : symtab->getSymbols()) { for (const Symbol *sym : symtab->getSymbols()) {
if (const auto *defined = dyn_cast<Defined>(sym)) { if (const auto *defined = dyn_cast<Defined>(sym)) {
if (defined->privateExtern) if (defined->privateExtern || !defined->isLive())
continue; continue;
trieBuilder.addSymbol(*defined); trieBuilder.addSymbol(*defined);
hasWeakSymbol = hasWeakSymbol || sym->isWeakDef(); hasWeakSymbol = hasWeakSymbol || sym->isWeakDef();
@ -590,7 +595,7 @@ void FunctionStartsSection::finalizeContents() {
uint64_t addr = in.header->addr; uint64_t addr = in.header->addr;
for (const Symbol *sym : symtab->getSymbols()) { for (const Symbol *sym : symtab->getSymbols()) {
if (const auto *defined = dyn_cast<Defined>(sym)) { if (const auto *defined = dyn_cast<Defined>(sym)) {
if (!defined->isec || !isCodeSection(defined->isec)) if (!defined->isec || !isCodeSection(defined->isec) || !defined->isLive())
continue; continue;
// TODO: Add support for thumbs, in that case // TODO: Add support for thumbs, in that case
// the lowest bit of nextAddr needs to be set to 1. // the lowest bit of nextAddr needs to be set to 1.
@ -667,6 +672,8 @@ void SymtabSection::emitStabs() {
for (const SymtabEntry &entry : for (const SymtabEntry &entry :
concat<SymtabEntry>(localSymbols, externalSymbols)) { concat<SymtabEntry>(localSymbols, externalSymbols)) {
Symbol *sym = entry.sym; Symbol *sym = entry.sym;
assert(sym->isLive() &&
"dead symbols should not be in localSymbols, externalSymbols");
if (auto *defined = dyn_cast<Defined>(sym)) { if (auto *defined = dyn_cast<Defined>(sym)) {
if (defined->isAbsolute()) if (defined->isAbsolute())
continue; continue;
@ -729,12 +736,8 @@ void SymtabSection::finalizeContents() {
for (const InputFile *file : inputFiles) { for (const InputFile *file : inputFiles) {
if (auto *objFile = dyn_cast<ObjFile>(file)) { if (auto *objFile = dyn_cast<ObjFile>(file)) {
for (Symbol *sym : objFile->symbols) { for (Symbol *sym : objFile->symbols) {
if (sym == nullptr) if (auto *defined = dyn_cast_or_null<Defined>(sym)) {
continue; if (!defined->isExternal() && defined->isLive()) {
// TODO: when we implement -dead_strip, we should filter out symbols
// that belong to dead sections.
if (auto *defined = dyn_cast<Defined>(sym)) {
if (!defined->isExternal()) {
StringRef name = defined->getName(); StringRef name = defined->getName();
if (!name.startswith("l") && !name.startswith("L")) if (!name.startswith("l") && !name.startswith("L"))
addSymbol(localSymbols, sym); addSymbol(localSymbols, sym);
@ -750,6 +753,8 @@ void SymtabSection::finalizeContents() {
addSymbol(localSymbols, dyldPrivate); addSymbol(localSymbols, dyldPrivate);
for (Symbol *sym : symtab->getSymbols()) { for (Symbol *sym : symtab->getSymbols()) {
if (!sym->isLive())
continue;
if (auto *defined = dyn_cast<Defined>(sym)) { if (auto *defined = dyn_cast<Defined>(sym)) {
if (!defined->includeInSymtab) if (!defined->includeInSymtab)
continue; continue;

View File

@ -220,7 +220,7 @@ struct WeakBindingEntry {
// other dylibs should coalesce to. // other dylibs should coalesce to.
// //
// 2) Weak bindings: These tell dyld that a given symbol reference should // 2) Weak bindings: These tell dyld that a given symbol reference should
// coalesce to a non-weak definition if one is found. Note that unlike in the // coalesce to a non-weak definition if one is found. Note that unlike the
// entries in the BindingSection, the bindings here only refer to these // entries in the BindingSection, the bindings here only refer to these
// symbols by name, but do not specify which dylib to load them from. // symbols by name, but do not specify which dylib to load them from.
class WeakBindingSection : public LinkEditSection { class WeakBindingSection : public LinkEditSection {

View File

@ -89,19 +89,11 @@ using namespace lld::macho;
// compact_unwind_encoding.h for an overview of the format we are encoding // compact_unwind_encoding.h for an overview of the format we are encoding
// here. // here.
// TODO(gkm): prune __eh_frame entries superseded by __unwind_info // TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410
// TODO(gkm): how do we align the 2nd-level pages? // TODO(gkm): how do we align the 2nd-level pages?
using EncodingMap = llvm::DenseMap<compact_unwind_encoding_t, size_t>; using EncodingMap = llvm::DenseMap<compact_unwind_encoding_t, size_t>;
template <class Ptr> struct CompactUnwindEntry {
Ptr functionAddress;
uint32_t functionLength;
compact_unwind_encoding_t encoding;
Ptr personality;
Ptr lsda;
};
struct SecondLevelPage { struct SecondLevelPage {
uint32_t kind; uint32_t kind;
size_t entryIndex; size_t entryIndex;
@ -146,6 +138,11 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(InputSection *isec) {
assert(!isec->shouldOmitFromOutput() && assert(!isec->shouldOmitFromOutput() &&
"__compact_unwind section should not be omitted"); "__compact_unwind section should not be omitted");
// FIXME: This could skip relocations for CompactUnwindEntries that
// point to dead-stripped functions. That might save some amount of
// work. But since there are usually just few personality functions
// that are referenced from many places, at least some of them likely
// live, it wouldn't reduce number of got entries.
for (Reloc &r : isec->relocs) { for (Reloc &r : isec->relocs) {
assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED)); assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
if (r.offset % sizeof(CompactUnwindEntry<Ptr>) != if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
@ -177,17 +174,20 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(InputSection *isec) {
} }
if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) {
assert(!referentIsec->shouldOmitFromOutput()); assert(!referentIsec->isCoalescedWeak());
// Personality functions can be referenced via section relocations // Personality functions can be referenced via section relocations
// if they live in the same object file. Create placeholder synthetic // if they live in the same object file. Create placeholder synthetic
// symbols for them in the GOT. // symbols for them in the GOT.
Symbol *&s = personalityTable[{referentIsec, r.addend}]; Symbol *&s = personalityTable[{referentIsec, r.addend}];
if (s == nullptr) { if (s == nullptr) {
// This runs after dead stripping, so the noDeadStrip argument does not
// matter.
s = make<Defined>("<internal>", /*file=*/nullptr, referentIsec, s = make<Defined>("<internal>", /*file=*/nullptr, referentIsec,
r.addend, /*size=*/0, /*isWeakDef=*/false, r.addend, /*size=*/0, /*isWeakDef=*/false,
/*isExternal=*/false, /*isPrivateExtern=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
/*isThumb=*/false, /*isReferencedDynamically=*/false); /*isThumb=*/false, /*isReferencedDynamically=*/false,
/*noDeadStrip=*/false);
in.got->addEntry(s); in.got->addEntry(s);
} }
r.referent = s; r.referent = s;

View File

@ -20,6 +20,14 @@
namespace lld { namespace lld {
namespace macho { namespace macho {
template <class Ptr> struct CompactUnwindEntry {
Ptr functionAddress;
uint32_t functionLength;
compact_unwind_encoding_t encoding;
Ptr personality;
Ptr lsda;
};
class UnwindInfoSection : public SyntheticSection { class UnwindInfoSection : public SyntheticSection {
public: public:
bool isNeeded() const override { return compactUnwindSection != nullptr; } bool isNeeded() const override { return compactUnwindSection != nullptr; }

View File

@ -609,9 +609,10 @@ void Writer::scanSymbols() {
TimeTraceScope timeScope("Scan symbols"); TimeTraceScope timeScope("Scan symbols");
for (const Symbol *sym : symtab->getSymbols()) { for (const Symbol *sym : symtab->getSymbols()) {
if (const auto *defined = dyn_cast<Defined>(sym)) { if (const auto *defined = dyn_cast<Defined>(sym)) {
if (defined->overridesWeakDef) if (defined->overridesWeakDef && defined->isLive())
in.weakBinding->addNonWeakDefinition(defined); in.weakBinding->addNonWeakDefinition(defined);
} else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) { } else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
// This branch intentionally doesn't check isLive().
if (dysym->isDynamicLookup()) if (dysym->isDynamicLookup())
continue; continue;
dysym->getFile()->refState = dysym->getFile()->refState =

View File

@ -6,5 +6,5 @@ install-name: '/usr/lib/libc++abi.dylib'
current-version: 1281 current-version: 1281
exports: exports:
- archs: [ i386, x86_64, arm64 ] - archs: [ i386, x86_64, arm64 ]
symbols: [ ___gxx_personality_v0 ] symbols: [ ___cxa_allocate_exception, ___cxa_begin_catch, ___cxa_end_catch, ___cxa_throw, ___gxx_personality_v0, __ZTIi ]
... ...

737
lld/test/MachO/dead-strip.s Normal file
View File

@ -0,0 +1,737 @@
# REQUIRES: x86
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
# RUN: %t/basics.s -o %t/basics.o
## Check that .private_extern symbols are marked as local in the symbol table
## and aren't in the export trie.
## Dead-stripped symbols should also not be in a map file output.
# RUN: %lld -lSystem -dead_strip -map %t/map -u _ref_private_extern_u \
# RUN: %t/basics.o -o %t/basics
# RUN: llvm-objdump --syms --section-headers %t/basics | \
# RUN: FileCheck --check-prefix=EXEC --implicit-check-not _unref %s
# RUN: llvm-objdump --macho --section=__DATA,__ref_section \
# RUN: --exports-trie --indirect-symbols %t/basics | \
# RUN: FileCheck --check-prefix=EXECDATA --implicit-check-not _unref %s
# RUN: llvm-otool -l %t/basics | grep -q 'segname __PAGEZERO'
# RUN: FileCheck --check-prefix=MAP --implicit-check-not _unref %s < %t/map
# EXEC-LABEL: Sections:
# EXEC-LABEL: Name
# EXEC-NEXT: __text
# EXEC-NEXT: __got
# EXEC-NEXT: __ref_section
# EXEC-NEXT: __common
# EXEC-LABEL: SYMBOL TABLE:
# EXEC-NEXT: l {{.*}} _ref_data
# EXEC-NEXT: l {{.*}} _ref_local
# EXEC-NEXT: l {{.*}} _ref_from_no_dead_strip_globl
# EXEC-NEXT: l {{.*}} _no_dead_strip_local
# EXEC-NEXT: l {{.*}} _ref_from_no_dead_strip_local
# EXEC-NEXT: l {{.*}} _ref_private_extern_u
# EXEC-NEXT: l {{.*}} _main
# EXEC-NEXT: l {{.*}} _ref_private_extern
# EXEC-NEXT: g {{.*}} _ref_com
# EXEC-NEXT: g {{.*}} _no_dead_strip_globl
# EXEC-NEXT: g {{.*}} __mh_execute_header
# EXECDATA-LABEL: Indirect symbols
# EXECDATA-NEXT: name
# EXECDATA-NEXT: _ref_com
# EXECDATA-LABEL: Contents of (__DATA,__ref_section) section
# EXECDATA-NEXT: 04 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00
# EXECDATA-LABEL: Exports trie:
# EXECDATA-NEXT: __mh_execute_header
# EXECDATA-NEXT: _ref_com
# EXECDATA-NEXT: _no_dead_strip_globl
# MAP: _main
# RUN: %lld -dylib -dead_strip -u _ref_private_extern_u %t/basics.o -o %t/basics.dylib
# RUN: llvm-objdump --syms %t/basics.dylib | \
# RUN: FileCheck --check-prefix=DYLIB --implicit-check-not _unref %s
# RUN: %lld -bundle -dead_strip -u _ref_private_extern_u %t/basics.o -o %t/basics.dylib
# RUN: llvm-objdump --syms %t/basics.dylib | \
# RUN: FileCheck --check-prefix=DYLIB --implicit-check-not _unref %s
# DYLIB-LABEL: SYMBOL TABLE:
# DYLIB-NEXT: l {{.*}} _ref_data
# DYLIB-NEXT: l {{.*}} _ref_local
# DYLIB-NEXT: l {{.*}} _ref_from_no_dead_strip_globl
# DYLIB-NEXT: l {{.*}} _no_dead_strip_local
# DYLIB-NEXT: l {{.*}} _ref_from_no_dead_strip_local
# DYLIB-NEXT: l {{.*}} _ref_private_extern_u
# DYLIB-NEXT: l {{.*}} _ref_private_extern
# DYLIB-NEXT: g {{.*}} _ref_com
# DYLIB-NEXT: g {{.*}} _unref_com
# DYLIB-NEXT: g {{.*}} _unref_extern
# DYLIB-NEXT: g {{.*}} _no_dead_strip_globl
## Absolute symbol handling.
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
# RUN: %t/abs.s -o %t/abs.o
# RUN: %lld -lSystem -dead_strip %t/abs.o -o %t/abs
# RUN: llvm-objdump --macho --syms --exports-trie %t/abs | \
# RUN: FileCheck --check-prefix=ABS %s
#ABS-LABEL: SYMBOL TABLE:
#ABS-NEXT: g {{.*}} _main
#ABS-NEXT: g *ABS* _abs1
#ABS-NEXT: g {{.*}} __mh_execute_header
#ABS-LABEL: Exports trie:
#ABS-NEXT: __mh_execute_header
#ABS-NEXT: _main
#ABS-NEXT: _abs1 [absolute]
## Check that symbols from -exported_symbol(s_list) are preserved.
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
# RUN: %t/exported-symbol.s -o %t/exported-symbol.o
# RUN: %lld -lSystem -dead_strip -exported_symbol _my_exported_symbol \
# RUN: %t/exported-symbol.o -o %t/exported-symbol
# RUN: llvm-objdump --syms %t/exported-symbol | \
# RUN: FileCheck --check-prefix=EXPORTEDSYMBOL --implicit-check-not _unref %s
# EXPORTEDSYMBOL-LABEL: SYMBOL TABLE:
# EXPORTEDSYMBOL-NEXT: l {{.*}} _main
# EXPORTEDSYMBOL-NEXT: l {{.*}} __mh_execute_header
# EXPORTEDSYMBOL-NEXT: g {{.*}} _my_exported_symbol
## Check that mod_init_funcs and mod_term_funcs are not stripped.
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
# RUN: %t/mod-funcs.s -o %t/mod-funcs.o
# RUN: %lld -lSystem -dead_strip %t/mod-funcs.o -o %t/mod-funcs
# RUN: llvm-objdump --syms %t/mod-funcs | \
# RUN: FileCheck --check-prefix=MODFUNCS --implicit-check-not _unref %s
# MODFUNCS-LABEL: SYMBOL TABLE:
# MODFUNCS-NEXT: l {{.*}} _ref_from_init
# MODFUNCS-NEXT: l {{.*}} _ref_init
# MODFUNCS-NEXT: l {{.*}} _ref_from_term
# MODFUNCS-NEXT: l {{.*}} _ref_term
# MODFUNCS-NEXT: g {{.*}} _main
# MODFUNCS-NEXT: g {{.*}} __mh_execute_header
## Check that DylibSymbols in dead subsections are stripped: They should
## not be in the import table and should have no import stubs.
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
# RUN: %t/dylib.s -o %t/dylib.o
# RUN: %lld -dylib -dead_strip %t/dylib.o -o %t/dylib.dylib
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
# RUN: %t/strip-dylib-ref.s -o %t/strip-dylib-ref.o
# RUN: %lld -lSystem -dead_strip %t/strip-dylib-ref.o %t/dylib.dylib \
# RUN: -o %t/strip-dylib-ref -U _ref_undef_fun -U _unref_undef_fun
# RUN: llvm-objdump --syms --bind --lazy-bind --weak-bind %t/strip-dylib-ref | \
# RUN: FileCheck --check-prefix=STRIPDYLIB --implicit-check-not _unref %s
# STRIPDYLIB: SYMBOL TABLE:
# STRIPDYLIB-NEXT: l {{.*}} __dyld_private
# STRIPDYLIB-NEXT: g {{.*}} _main
# STRIPDYLIB-NEXT: g {{.*}} __mh_execute_header
# STRIPDYLIB-NEXT: *UND* _ref_undef_fun
# STRIPDYLIB-NEXT: *UND* dyld_stub_binder
# STRIPDYLIB-NEXT: *UND* _ref_dylib_fun
# STRIPDYLIB: Bind table:
# STRIPDYLIB: Lazy bind table:
# STRIPDYLIB: __DATA __la_symbol_ptr {{.*}} flat-namespace _ref_undef_fun
# STRIPDYLIB: __DATA __la_symbol_ptr {{.*}} dylib _ref_dylib_fun
# STRIPDYLIB: Weak bind table:
## Stubs smoke check: There should be two stubs entries, not four, but we
## don't verify that they belong to _ref_undef_fun and _ref_dylib_fun.
# RUN: llvm-objdump -d --section=__stubs --section=__stub_helper \
# RUN: %t/strip-dylib-ref |FileCheck --check-prefix=STUBS %s
# STUBS-LABEL: <__stubs>:
# STUBS-NEXT: jmpq
# STUBS-NEXT: jmpq
# STUBS-NOT: jmpq
# STUBS-LABEL: <__stub_helper>:
# STUBS: pushq $0
# STUBS: jmp
# STUBS: jmp
# STUBS-NOT: jmp
## An undefined symbol referenced from a dead-stripped function shouldn't
## produce a diagnostic:
# RUN: %lld -lSystem -dead_strip %t/strip-dylib-ref.o %t/dylib.dylib \
# RUN: -o %t/strip-dylib-ref -U _ref_undef_fun
## S_ATTR_LIVE_SUPPORT tests.
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
# RUN: %t/live-support.s -o %t/live-support.o
# RUN: %lld -lSystem -dead_strip %t/live-support.o %t/dylib.dylib \
# RUN: -U _ref_undef_fun -U _unref_undef_fun -o %t/live-support
# RUN: llvm-objdump --syms %t/live-support | \
# RUN: FileCheck --check-prefix=LIVESUPP --implicit-check-not _unref %s
# LIVESUPP-LABEL: SYMBOL TABLE:
# LIVESUPP-NEXT: l {{.*}} _ref_ls_fun_fw
# LIVESUPP-NEXT: l {{.*}} _ref_ls_fun_bw
# LIVESUPP-NEXT: l {{.*}} _ref_ls_dylib_fun
# LIVESUPP-NEXT: l {{.*}} _ref_ls_undef_fun
# LIVESUPP-NEXT: l {{.*}} __dyld_private
# LIVESUPP-NEXT: g {{.*}} _main
# LIVESUPP-NEXT: g {{.*}} _bar
# LIVESUPP-NEXT: g {{.*}} _foo
# LIVESUPP-NEXT: g {{.*}} __mh_execute_header
# LIVESUPP-NEXT: *UND* _ref_undef_fun
# LIVESUPP-NEXT: *UND* dyld_stub_binder
# LIVESUPP-NEXT: *UND* _ref_dylib_fun
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
# RUN: %t/live-support-iterations.s -o %t/live-support-iterations.o
# RUN: %lld -lSystem -dead_strip %t/live-support-iterations.o \
# RUN: -o %t/live-support-iterations
# RUN: llvm-objdump --syms %t/live-support-iterations | \
# RUN: FileCheck --check-prefix=LIVESUPP2 --implicit-check-not _unref %s
# LIVESUPP2-LABEL: SYMBOL TABLE:
# LIVESUPP2-NEXT: l {{.*}} _bar
# LIVESUPP2-NEXT: l {{.*}} _foo_refd
# LIVESUPP2-NEXT: l {{.*}} _bar_refd
# LIVESUPP2-NEXT: l {{.*}} _baz
# LIVESUPP2-NEXT: l {{.*}} _baz_refd
# LIVESUPP2-NEXT: l {{.*}} _foo
# LIVESUPP2-NEXT: g {{.*}} _main
# LIVESUPP2-NEXT: g {{.*}} __mh_execute_header
## Dead stripping should not remove the __TEXT,__unwind_info
## and __TEXT,__gcc_except_tab functions, but it should still
## remove the unreferenced function __Z5unref.
## The reference to ___gxx_personality_v0 should also not be
## stripped.
## (Need to use darwin19.0.0 to make -mc emit __LD,__compact_unwind.)
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 \
# RUN: %t/unwind.s -o %t/unwind.o
# RUN: %lld -lc++ -lSystem -dead_strip %t/unwind.o -o %t/unwind
# RUN: llvm-objdump --syms %t/unwind | \
# RUN: FileCheck --check-prefix=UNWIND --implicit-check-not unref %s
# RUN: llvm-otool -l %t/unwind | grep -q 'sectname __unwind_info'
# RUN: llvm-otool -l %t/unwind | grep -q 'sectname __gcc_except_tab'
# UNWIND-LABEL: SYMBOL TABLE:
# UNWIND-NEXT: l O __TEXT,__gcc_except_tab GCC_except_table1
# UNWIND-NEXT: l O __DATA,__data __dyld_private
# UNWIND-NEXT: g F __TEXT,__text _main
# UNWIND-NEXT: g F __TEXT,__text __mh_execute_header
# UNWIND-NEXT: *UND* ___cxa_allocate_exception
# UNWIND-NEXT: *UND* ___cxa_end_catch
# UNWIND-NEXT: *UND* __ZTIi
# UNWIND-NEXT: *UND* ___cxa_throw
# UNWIND-NEXT: *UND* ___gxx_personality_v0
# UNWIND-NEXT: *UND* ___cxa_begin_catch
# UNWIND-NEXT: *UND* dyld_stub_binder
## If a dead stripped function has a strong ref to a dylib symbol but
## a live function only a weak ref, the dylib is still not a WEAK_DYLIB.
## This matches ld64.
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
# RUN: %t/weak-ref.s -o %t/weak-ref.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
# RUN: %t/strong-dead-ref.s -o %t/strong-dead-ref.o
# RUN: %lld -lSystem -dead_strip %t/weak-ref.o %t/strong-dead-ref.o \
# RUN: %t/dylib.dylib -o %t/weak-ref
# RUN: llvm-otool -l %t/weak-ref | FileCheck -DDIR=%t --check-prefix=WEAK %s
# WEAK: cmd LC_LOAD_DYLIB
# WEAK-NEXT: cmdsize
# WEAK-NEXT: name /usr/lib/libSystem.dylib
# WEAK: cmd LC_LOAD_DYLIB
# WEAK-NEXT: cmdsize
# WEAK-NEXT: name [[DIR]]/dylib.dylib
## A strong symbol that would override a weak import does not emit the
## "this overrides a weak import" opcode if it is dead-stripped.
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
# RUN: %t/weak-dylib.s -o %t/weak-dylib.o
# RUN: %lld -dylib -dead_strip %t/weak-dylib.o -o %t/weak-dylib.dylib
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
# RUN: %t/dead-weak-override.s -o %t/dead-weak-override.o
# RUN: %lld -dead_strip %t/dead-weak-override.o %t/weak-dylib.dylib \
# RUN: -o %t/dead-weak-override
# RUN: llvm-objdump --macho --weak-bind --private-header \
# RUN: %t/dead-weak-override | FileCheck --check-prefix=DEADWEAK %s
# DEADWEAK-NOT: WEAK_DEFINES
# DEADWEAK: Weak bind table:
# DEADWEAK: segment section address type addend symbol
# DEADWEAK-NOT: strong _weak_in_dylib
## Stripped symbols should not be in the debug info stabs entries.
# RUN: llvm-mc -g -filetype=obj -triple=x86_64-apple-macos \
# RUN: %t/debug.s -o %t/debug.o
# RUN: %lld -lSystem -dead_strip %t/debug.o -o %t/debug
# RUN: dsymutil -s %t/debug | FileCheck --check-prefix=EXECSTABS %s
# EXECSTABS-NOT: N_FUN {{.*}} '_unref'
# EXECSTABS: N_FUN {{.*}} '_main'
# EXECSTABS-NOT: N_FUN {{.*}} '_unref'
#--- basics.s
.comm _ref_com, 1
.comm _unref_com, 1
.section __DATA,__unref_section
_unref_data:
.quad 4
l_unref_data:
.quad 5
## Referenced by no_dead_strip == S_ATTR_NO_DEAD_STRIP
.section __DATA,__ref_section,regular,no_dead_strip
## Referenced because in no_dead_strip section.
_ref_data:
.quad 4
## This is a local symbol so it's not in the symbol table, but
## it is still in the section data.
l_ref_data:
.quad 5
.text
# Exported symbols should not be stripped from dylibs
# or bundles, but they should be stripped from executables.
.globl _unref_extern
_unref_extern:
callq _ref_local
retq
# Unreferenced local symbols should be stripped.
_unref_local:
retq
# Same for unreferenced private externs.
.globl _unref_private_extern
.private_extern _unref_private_extern
_unref_private_extern:
# This shouldn't create an indirect symbol since it's
# a reference from a dead function.
movb _unref_com@GOTPCREL(%rip), %al
retq
# Referenced local symbols should not be stripped.
_ref_local:
callq _ref_private_extern
retq
# Same for referenced private externs.
# This one is referenced by a relocation.
.globl _ref_private_extern
.private_extern _ref_private_extern
_ref_private_extern:
retq
# This one is referenced by a -u flag.
.globl _ref_private_extern_u
.private_extern _ref_private_extern_u
_ref_private_extern_u:
retq
# Entry point should not be stripped for executables, even if hidden.
# For shared libraries this is stripped since it's just a regular hidden
# symbol there.
.globl _main
.private_extern _main
_main:
movb _ref_com@GOTPCREL(%rip), %al
callq _ref_local
retq
# Things marked no_dead_strip should not be stripped either.
# (clang emits this e.g. for `__attribute__((used))` globals.)
# Both for .globl symbols...
.globl _no_dead_strip_globl
.no_dead_strip _no_dead_strip_globl
_no_dead_strip_globl:
callq _ref_from_no_dead_strip_globl
retq
_ref_from_no_dead_strip_globl:
retq
# ...and for locals.
.no_dead_strip _no_dead_strip_local
_no_dead_strip_local:
callq _ref_from_no_dead_strip_local
retq
_ref_from_no_dead_strip_local:
retq
.subsections_via_symbols
#--- exported-symbol.s
.text
.globl _unref_symbol
_unref_symbol:
retq
.globl _my_exported_symbol
_my_exported_symbol:
retq
.globl _main
_main:
retq
.subsections_via_symbols
#--- abs.s
.globl _abs1, _abs2, _abs3
.no_dead_strip _abs1
_abs1 = 1
_abs2 = 2
_abs3 = 3
.section __DATA,__foo,regular,no_dead_strip
# Absolute symbols are not in a section, so the no_dead_strip
# on the section above has no effect.
.globl _abs4
_abs4 = 4
.text
.globl _main
_main:
# This is relaxed away, so there's no relocation here and
# _abs3 isn't in the exported symbol table.
mov _abs3, %rax
retq
.subsections_via_symbols
#--- mod-funcs.s
## Roughly based on `clang -O2 -S` output for `struct A { A(); ~A(); }; A a;`
## for mod_init_funcs. mod_term_funcs then similar to that.
.section __TEXT,__StaticInit,regular,pure_instructions
__unref:
retq
_ref_from_init:
retq
_ref_init:
callq _ref_from_init
retq
_ref_from_term:
retq
_ref_term:
callq _ref_from_term
retq
.globl _main
_main:
retq
.section __DATA,__mod_init_func,mod_init_funcs
.quad _ref_init
.section __DATA,__mod_term_func,mod_term_funcs
.quad _ref_term
.subsections_via_symbols
#--- dylib.s
.text
.globl _ref_dylib_fun
_ref_dylib_fun:
retq
.globl _unref_dylib_fun
_unref_dylib_fun:
retq
.subsections_via_symbols
#--- strip-dylib-ref.s
.text
_unref:
callq _ref_dylib_fun
callq _unref_dylib_fun
callq _ref_undef_fun
callq _unref_undef_fun
retq
.globl _main
_main:
callq _ref_dylib_fun
callq _ref_undef_fun
retq
.subsections_via_symbols
#--- live-support.s
## In practice, live_support is used for instruction profiling
## data and asan. (Also for __eh_frame, but that needs special handling
## in the linker anyways.)
## This test isn't based on anything happening in real code though.
.section __TEXT,__ref_ls_fw,regular,live_support
_ref_ls_fun_fw:
# This is called by _main and is kept alive by normal
# forward liveness propagation, The live_support attribute
# does nothing in this case.
retq
.section __TEXT,__unref_ls_fw,regular,live_support
_unref_ls_fun_fw:
retq
.section __TEXT,__ref_ls_bw,regular,live_support
_ref_ls_fun_bw:
# This _calls_ something that's alive but isn't referenced itself. This is
# kept alive only due to this being in a live_support section.
callq _foo
# _bar on the other hand is kept alive since it's called from here.
callq _bar
retq
## Kept alive by a live symbol form a dynamic library.
_ref_ls_dylib_fun:
callq _ref_dylib_fun
retq
## Kept alive by a live undefined symbol.
_ref_ls_undef_fun:
callq _ref_undef_fun
retq
## All symbols in this live_support section reference dead symbols
## and are hence dead themselves.
.section __TEXT,__unref_ls_bw,regular,live_support
_unref_ls_fun_bw:
callq _unref
retq
_unref_ls_dylib_fun_bw:
callq _unref_dylib_fun
retq
_unref_ls_undef_fun_bw:
callq _unref_undef_fun
retq
.text
.globl _unref
_unref:
retq
.globl _bar
_bar:
retq
.globl _foo
_foo:
callq _ref_ls_fun_fw
retq
.globl _main
_main:
callq _ref_ls_fun_fw
callq _foo
callq _ref_dylib_fun
callq _ref_undef_fun
retq
.subsections_via_symbols
#--- live-support-iterations.s
.section __TEXT,_ls,regular,live_support
## This is a live_support subsection that only becomes
## live after _foo below is processed. This means the algorithm of
## 1. mark things reachable from gc roots live
## 2. go through live sections and mark the ones live pointing to
## live symbols or sections
## needs more than one iteration, since _bar won't be live when step 2
## runs for the first time.
## (ld64 gets this wrong -- it has different output based on if _bar is
## before _foo or after it.)
_bar:
callq _foo_refd
callq _bar_refd
retq
## Same here. This is maybe more interesting since it references a live_support
## symbol instead of a "normal" symbol.
_baz:
callq _foo_refd
callq _baz_refd
retq
_foo:
callq _main
callq _foo_refd
retq
## Test no_dead_strip on a symbol in a live_support section.
## ld64 ignores this, but that doesn't look intentional. So lld honors it.
.no_dead_strip
_quux:
retq
.text
.globl _main
_main:
movq $0, %rax
retq
_foo_refd:
retq
_bar_refd:
retq
_baz_refd:
retq
.subsections_via_symbols
#--- unwind.s
## This is the output of `clang -O2 -S throw.cc` where throw.cc
## looks like this:
## void unref() {}
## int main() {
## try {
## throw 0;
## } catch (int i) {
## return i;
## }
## }
.section __TEXT,__text,regular,pure_instructions
.globl __Z5unrefv
.p2align 4, 0x90
__Z5unrefv:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
popq %rbp
retq
.cfi_endproc
.globl _main
.p2align 4, 0x90
_main:
Lfunc_begin0:
.cfi_startproc
.cfi_personality 155, ___gxx_personality_v0
.cfi_lsda 16, Lexception0
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
pushq %rbx
pushq %rax
.cfi_offset %rbx, -24
movl $4, %edi
callq ___cxa_allocate_exception
movl $0, (%rax)
Ltmp0:
movq __ZTIi@GOTPCREL(%rip), %rsi
movq %rax, %rdi
xorl %edx, %edx
callq ___cxa_throw
Ltmp1:
ud2
LBB1_2:
Ltmp2:
movq %rax, %rdi
callq ___cxa_begin_catch
movl (%rax), %ebx
callq ___cxa_end_catch
movl %ebx, %eax
addq $8, %rsp
popq %rbx
popq %rbp
retq
Lfunc_end0:
.cfi_endproc
.section __TEXT,__gcc_except_tab
.p2align 2
GCC_except_table1:
Lexception0:
.byte 255 ## @LPStart Encoding = omit
.byte 155 ## @TType Encoding = indirect pcrel sdata4
.uleb128 Lttbase0-Lttbaseref0
Lttbaseref0:
.byte 1 ## Call site Encoding = uleb128
.uleb128 Lcst_end0-Lcst_begin0
Lcst_begin0:
.uleb128 Lfunc_begin0-Lfunc_begin0 ## >> Call Site 1 <<
.uleb128 Ltmp0-Lfunc_begin0 ## Call between Lfunc_begin0 and Ltmp0
.byte 0 ## has no landing pad
.byte 0 ## On action: cleanup
.uleb128 Ltmp0-Lfunc_begin0 ## >> Call Site 2 <<
.uleb128 Ltmp1-Ltmp0 ## Call between Ltmp0 and Ltmp1
.uleb128 Ltmp2-Lfunc_begin0 ## jumps to Ltmp2
.byte 1 ## On action: 1
.uleb128 Ltmp1-Lfunc_begin0 ## >> Call Site 3 <<
.uleb128 Lfunc_end0-Ltmp1 ## Call between Ltmp1 and Lfunc_end0
.byte 0 ## has no landing pad
.byte 0 ## On action: cleanup
Lcst_end0:
.byte 1 ## >> Action Record 1 <<
## Catch TypeInfo 1
.byte 0 ## No further actions
.p2align 2
## >> Catch TypeInfos <<
.long __ZTIi@GOTPCREL+4 ## TypeInfo 1
Lttbase0:
.p2align 2
## -- End function
.subsections_via_symbols
#--- weak-ref.s
.text
.weak_reference _ref_dylib_fun
.globl _main
_main:
callq _ref_dylib_fun
retq
.subsections_via_symbols
#--- strong-dead-ref.s
.text
.globl _unref_dylib_fun
_unref:
callq _unref_dylib_fun
retq
.subsections_via_symbols
#--- weak-dylib.s
.text
.globl _weak_in_dylib
.weak_definition _weak_in_dylib
_weak_in_dylib:
retq
.subsections_via_symbols
#--- dead-weak-override.s
## Overrides the _weak_in_dylib symbol in weak-dylib, but is dead stripped.
.text
#.no_dead_strip _weak_in_dylib
.globl _weak_in_dylib
_weak_in_dylib:
retq
.globl _main
_main:
retq
.subsections_via_symbols
#--- debug.s
.text
.globl _unref
_unref:
retq
.globl _main
_main:
retq
.subsections_via_symbols

View File

@ -9,7 +9,7 @@
## (but not in other types of files) ## (but not in other types of files)
# RUN: llvm-mc %t/dylib.s -triple=x86_64-apple-macos10.15 -filetype=obj -o %t/dylib.o # RUN: llvm-mc %t/dylib.s -triple=x86_64-apple-macos10.15 -filetype=obj -o %t/dylib.o
# RUN: %lld -pie -dylib %t/dylib.o -o %t/dylib.out # RUN: %lld -pie -dylib -dead_strip %t/dylib.o -o %t/dylib.out
# RUN: llvm-objdump -m --syms %t/dylib.out | FileCheck %s --check-prefix DYLIB # RUN: llvm-objdump -m --syms %t/dylib.out | FileCheck %s --check-prefix DYLIB
# RUN: not %lld -pie -o /dev/null %t/dylib.o 2>&1 | FileCheck %s --check-prefix ERR-DYLIB # RUN: not %lld -pie -o /dev/null %t/dylib.o 2>&1 | FileCheck %s --check-prefix ERR-DYLIB
@ -21,7 +21,7 @@
## Test that in an executable, we can link against __mh_execute_header ## Test that in an executable, we can link against __mh_execute_header
# RUN: llvm-mc %t/main.s -triple=x86_64-apple-macos10.15 -filetype=obj -o %t/exec.o # RUN: llvm-mc %t/main.s -triple=x86_64-apple-macos10.15 -filetype=obj -o %t/exec.o
# RUN: %lld -pie %t/exec.o -o %t/exec.out # RUN: %lld -pie -dead_strip -lSystem %t/exec.o -o %t/exec.out
## But it would be an error trying to reference __mh_execute_header in a dylib ## But it would be an error trying to reference __mh_execute_header in a dylib
# RUN: not %lld -pie -o /dev/null -dylib %t/exec.o 2>&1 | FileCheck %s --check-prefix ERR-EXEC # RUN: not %lld -pie -o /dev/null -dylib %t/exec.o 2>&1 | FileCheck %s --check-prefix ERR-EXEC
@ -34,6 +34,7 @@
_main: _main:
mov __mh_execute_header@GOTPCREL(%rip), %rax mov __mh_execute_header@GOTPCREL(%rip), %rax
ret ret
.subsections_via_symbols
#--- dylib.s #--- dylib.s
.text .text
@ -41,3 +42,4 @@ _main:
_main: _main:
mov __mh_dylib_header@GOTPCREL(%rip), %rax mov __mh_dylib_header@GOTPCREL(%rip), %rax
ret ret
.subsections_via_symbols

View File

@ -10,6 +10,16 @@
# RUN: -o %t %t.o # RUN: -o %t %t.o
# RUN: llvm-objdump -s %t | FileCheck %s # RUN: llvm-objdump -s %t | FileCheck %s
## -dead_strip does not strip -sectcreate sections,
## but also doesn't set S_ATTR_NO_DEAD_STRIP on them.
# RUN: %lld -dead_strip \
# RUN: -sectcreate SEG SEC1 %t1 \
# RUN: -segcreate SEG SEC2 %t3 \
# RUN: -sectcreate SEG SEC1 %t2 \
# RUN: -o %t %t.o
# RUN: llvm-objdump -s %t | FileCheck --check-prefix=STRIPPED %s
# RUN: llvm-readobj --sections %t | FileCheck --check-prefix=STRIPPEDSEC %s
# CHECK: Contents of section __TEXT,__text: # CHECK: Contents of section __TEXT,__text:
# CHECK: Contents of section __DATA,__data: # CHECK: Contents of section __DATA,__data:
# CHECK: my string!. # CHECK: my string!.
@ -19,6 +29,17 @@
# CHECK: Contents of section SEG,SEC2: # CHECK: Contents of section SEG,SEC2:
# CHECK: -sectcreate 2. # CHECK: -sectcreate 2.
# STRIPPED: Contents of section __TEXT,__text:
# STRIPPED-NOT: Contents of section __DATA,__data:
# STRIPPED-NOT: my string!.
# STRIPPED: Contents of section SEG,SEC1:
# STRIPPED: -sectcreate 1.1.
# STRIPPED: -sectcreate 1.2.
# STRIPPED: Contents of section SEG,SEC2:
# STRIPPED: -sectcreate 2.
# STRIPPEDSEC-NOT: NoDeadStrip
.text .text
.global _main .global _main
_main: _main:
@ -29,3 +50,5 @@ _main:
.global my_string .global my_string
my_string: my_string:
.string "my string!" .string "my string!"
.subsections_via_symbols

View File

@ -36,6 +36,7 @@ static_library("MachO2") {
"InputSection.cpp", "InputSection.cpp",
"LTO.cpp", "LTO.cpp",
"MapFile.cpp", "MapFile.cpp",
"MarkLive.cpp",
"ObjC.cpp", "ObjC.cpp",
"OutputSection.cpp", "OutputSection.cpp",
"OutputSegment.cpp", "OutputSegment.cpp",