LTO: Reduce memory consumption by creating an in-memory symbol table for InputFiles. NFCI.

Introduce symbol table data structures that can be potentially written to
disk, have the LTO library build those data structures using temporarily
constructed modules and redirect the LTO library implementation to go through
those data structures. This allows us to remove the LLVMContext and Modules
owned by InputFile.

With this change I measured a peak memory consumption decrease from 5.4GB to
2.8GB in a no-op incremental ThinLTO link of Chromium on Linux. The impact on
memory consumption is larger in COFF linkers where we are currently forced
to materialize all metadata in order to read linker options. Peak memory
consumption linking a large piece of Chromium for Windows with full LTO and
debug info decreases from >64GB (OOM) to 15GB.

Part of PR27551.

Differential Revision: https://reviews.llvm.org/D31364

llvm-svn: 299168
This commit is contained in:
Peter Collingbourne 2017-03-31 02:28:30 +00:00
parent af2dfce683
commit d9717aa0e4
8 changed files with 663 additions and 298 deletions

View File

@ -355,13 +355,12 @@ void BitcodeFile::parse() {
SymbolBody *Alias = Symtab->addUndefined(Saver.save(Fallback));
checkAndSetWeakAlias(Symtab, this, Sym->body(), Alias);
} else {
Expected<int> ComdatIndex = ObjSym.getComdatIndex();
bool IsCOMDAT = ComdatIndex && *ComdatIndex != -1;
bool IsCOMDAT = ObjSym.getComdatIndex() != -1;
Sym = Symtab->addRegular(this, SymName, IsCOMDAT);
}
SymbolBodies.push_back(Sym->body());
}
Directives = check(Obj->getLinkerOpts());
Directives = Obj->getCOFFLinkerOpts();
}
MachineTypes BitcodeFile::getMachineType() {

View File

@ -818,7 +818,7 @@ static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats,
uint8_t Visibility = mapVisibility(ObjSym.getVisibility());
bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable();
int C = check(ObjSym.getComdatIndex(), F->LogName);
int C = ObjSym.getComdatIndex();
if (C != -1 && !KeptComdats[C])
return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding,
Visibility, Type, CanOmitFromDynSym,
@ -855,10 +855,8 @@ void BitcodeFile::parse(DenseSet<CachedHashStringRef> &ComdatGroups) {
Obj = check(lto::InputFile::create(MBRef), this->LogName);
std::vector<bool> KeptComdats;
for (StringRef S : Obj->getComdatTable()) {
StringRef N = Saver.save(S);
KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(N)).second);
}
for (StringRef S : Obj->getComdatTable())
KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(S)).second);
for (const lto::InputFile::Symbol &ObjSym : Obj->symbols())
Symbols.push_back(createBitcodeSymbol<ELFT>(KeptComdats, ObjSym, this));

View File

@ -24,7 +24,7 @@
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/LTO/Config.h"
#include "llvm/Linker/IRMover.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Object/IRSymtab.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/thread.h"
@ -79,21 +79,26 @@ class LTO;
struct SymbolResolution;
class ThinBackendProc;
/// An input file. This is a wrapper for ModuleSymbolTable that exposes only the
/// An input file. This is a symbol table wrapper that only exposes the
/// information that an LTO client should need in order to do symbol resolution.
class InputFile {
public:
class Symbol;
private:
// FIXME: Remove LTO class friendship once we have bitcode symbol tables.
friend LTO;
InputFile() = default;
// FIXME: Remove the LLVMContext once we have bitcode symbol tables.
LLVMContext Ctx;
struct InputModule;
std::vector<InputModule> Mods;
ModuleSymbolTable SymTab;
std::vector<BitcodeModule> Mods;
SmallVector<char, 0> Strtab;
std::vector<Symbol> Symbols;
std::vector<StringRef> Comdats;
DenseMap<const Comdat *, unsigned> ComdatMap;
// [begin, end) for each module
std::vector<std::pair<size_t, size_t>> ModuleSymIndices;
StringRef SourceFileName, COFFLinkerOpts;
std::vector<StringRef> ComdatTable;
public:
~InputFile();
@ -101,170 +106,48 @@ public:
/// Create an InputFile.
static Expected<std::unique_ptr<InputFile>> create(MemoryBufferRef Object);
class symbol_iterator;
/// This is a wrapper for ArrayRef<ModuleSymbolTable::Symbol>::iterator that
/// exposes only the information that an LTO client should need in order to do
/// symbol resolution.
///
/// This object is ephemeral; it is only valid as long as an iterator obtained
/// from symbols() refers to it.
class Symbol {
friend symbol_iterator;
/// The purpose of this class is to only expose the symbol information that an
/// LTO client should need in order to do symbol resolution.
class Symbol : irsymtab::Symbol {
friend LTO;
ArrayRef<ModuleSymbolTable::Symbol>::iterator I;
const ModuleSymbolTable &SymTab;
const InputFile *File;
uint32_t Flags;
SmallString<64> Name;
bool shouldSkip() {
return !(Flags & object::BasicSymbolRef::SF_Global) ||
(Flags & object::BasicSymbolRef::SF_FormatSpecific);
}
void skip() {
ArrayRef<ModuleSymbolTable::Symbol>::iterator E = SymTab.symbols().end();
while (I != E) {
Flags = SymTab.getSymbolFlags(*I);
if (!shouldSkip())
break;
++I;
}
if (I == E)
return;
Name.clear();
{
raw_svector_ostream OS(Name);
SymTab.printSymbolName(OS, *I);
}
}
bool isGV() const { return I->is<GlobalValue *>(); }
GlobalValue *getGV() const { return I->get<GlobalValue *>(); }
public:
Symbol(ArrayRef<ModuleSymbolTable::Symbol>::iterator I,
const ModuleSymbolTable &SymTab, const InputFile *File)
: I(I), SymTab(SymTab), File(File) {
skip();
}
Symbol(const irsymtab::Symbol &S) : irsymtab::Symbol(S) {}
bool isUndefined() const {
return Flags & object::BasicSymbolRef::SF_Undefined;
}
bool isCommon() const { return Flags & object::BasicSymbolRef::SF_Common; }
bool isWeak() const { return Flags & object::BasicSymbolRef::SF_Weak; }
bool isIndirect() const {
return Flags & object::BasicSymbolRef::SF_Indirect;
}
/// For COFF weak externals, returns the name of the symbol that is used
/// as a fallback if the weak external remains undefined.
std::string getCOFFWeakExternalFallback() const {
assert((Flags & object::BasicSymbolRef::SF_Weak) &&
(Flags & object::BasicSymbolRef::SF_Indirect) &&
"symbol is not a weak external");
std::string Name;
raw_string_ostream OS(Name);
SymTab.printSymbolName(
OS,
cast<GlobalValue>(
cast<GlobalAlias>(getGV())->getAliasee()->stripPointerCasts()));
OS.flush();
return Name;
}
/// Returns the mangled name of the global.
StringRef getName() const { return Name; }
GlobalValue::VisibilityTypes getVisibility() const {
if (isGV())
return getGV()->getVisibility();
return GlobalValue::DefaultVisibility;
}
bool canBeOmittedFromSymbolTable() const {
return isGV() && llvm::canBeOmittedFromSymbolTable(getGV());
}
bool isTLS() const {
// FIXME: Expose a thread-local flag for module asm symbols.
return isGV() && getGV()->isThreadLocal();
}
// Returns the index of the comdat this symbol is in or -1 if the symbol
// is not in a comdat.
// FIXME: We have to return Expected<int> because aliases point to an
// arbitrary ConstantExpr and that might not actually be a constant. That
// means we might not be able to find what an alias is aliased to and
// so find its comdat.
Expected<int> getComdatIndex() const;
uint64_t getCommonSize() const {
assert(Flags & object::BasicSymbolRef::SF_Common);
if (!isGV())
return 0;
return getGV()->getParent()->getDataLayout().getTypeAllocSize(
getGV()->getType()->getElementType());
}
unsigned getCommonAlignment() const {
assert(Flags & object::BasicSymbolRef::SF_Common);
if (!isGV())
return 0;
return getGV()->getAlignment();
}
};
class symbol_iterator {
Symbol Sym;
public:
symbol_iterator(ArrayRef<ModuleSymbolTable::Symbol>::iterator I,
const ModuleSymbolTable &SymTab, const InputFile *File)
: Sym(I, SymTab, File) {}
symbol_iterator &operator++() {
++Sym.I;
Sym.skip();
return *this;
}
symbol_iterator operator++(int) {
symbol_iterator I = *this;
++*this;
return I;
}
const Symbol &operator*() const { return Sym; }
const Symbol *operator->() const { return &Sym; }
bool operator!=(const symbol_iterator &Other) const {
return Sym.I != Other.Sym.I;
}
using irsymtab::Symbol::isUndefined;
using irsymtab::Symbol::isCommon;
using irsymtab::Symbol::isWeak;
using irsymtab::Symbol::isIndirect;
using irsymtab::Symbol::getName;
using irsymtab::Symbol::getVisibility;
using irsymtab::Symbol::canBeOmittedFromSymbolTable;
using irsymtab::Symbol::isTLS;
using irsymtab::Symbol::getComdatIndex;
using irsymtab::Symbol::getCommonSize;
using irsymtab::Symbol::getCommonAlignment;
using irsymtab::Symbol::getCOFFWeakExternalFallback;
};
/// A range over the symbols in this InputFile.
iterator_range<symbol_iterator> symbols() {
return llvm::make_range(
symbol_iterator(SymTab.symbols().begin(), SymTab, this),
symbol_iterator(SymTab.symbols().end(), SymTab, this));
}
ArrayRef<Symbol> symbols() const { return Symbols; }
/// Returns linker options specified in the input file.
Expected<std::string> getLinkerOpts();
StringRef getCOFFLinkerOpts() const { return COFFLinkerOpts; }
/// Returns the path to the InputFile.
StringRef getName() const;
/// Returns the source file path specified at compile time.
StringRef getSourceFileName() const;
StringRef getSourceFileName() const { return SourceFileName; }
// Returns a table with all the comdats used by this file.
ArrayRef<StringRef> getComdatTable() const { return Comdats; }
ArrayRef<StringRef> getComdatTable() const { return ComdatTable; }
private:
iterator_range<symbol_iterator> module_symbols(InputModule &IM);
ArrayRef<Symbol> module_symbols(unsigned I) const {
const auto &Indices = ModuleSymIndices[I];
return {Symbols.data() + Indices.first, Symbols.data() + Indices.second};
}
};
/// This class wraps an output stream for a native object. Most clients should
@ -452,20 +335,20 @@ private:
// Global mapping from mangled symbol names to resolutions.
StringMap<GlobalResolution> GlobalResolutions;
void addSymbolToGlobalRes(SmallPtrSet<GlobalValue *, 8> &Used,
const InputFile::Symbol &Sym, SymbolResolution Res,
void addSymbolToGlobalRes(const InputFile::Symbol &Sym, SymbolResolution Res,
unsigned Partition);
// These functions take a range of symbol resolutions [ResI, ResE) and consume
// the resolutions used by a single input module by incrementing ResI. After
// these functions return, [ResI, ResE) will refer to the resolution range for
// the remaining modules in the InputFile.
Error addModule(InputFile &Input, InputFile::InputModule &IM,
Error addModule(InputFile &Input, unsigned ModI,
const SymbolResolution *&ResI, const SymbolResolution *ResE);
Error addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
Error addRegularLTO(BitcodeModule BM,
ArrayRef<InputFile::Symbol> Syms,
const SymbolResolution *&ResI,
const SymbolResolution *ResE);
Error addThinLTO(BitcodeModule BM, Module &M,
iterator_range<InputFile::symbol_iterator> Syms,
Error addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
const SymbolResolution *&ResI, const SymbolResolution *ResE);
Error runRegularLTO(AddStreamFn AddStream);

View File

@ -0,0 +1,298 @@
//===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains data definitions and a reader and builder for a symbol
// table for LLVM IR. Its purpose is to allow linkers and other consumers of
// bitcode files to efficiently read the symbol table for symbol resolution
// purposes without needing to construct a module in memory.
//
// As with most object files the symbol table has two parts: the symbol table
// itself and a string table which is referenced by the symbol table.
//
// A symbol table corresponds to a single bitcode file, which may consist of
// multiple modules, so symbol tables may likewise contain symbols for multiple
// modules.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_OBJECT_IRSYMTAB_H
#define LLVM_OBJECT_IRSYMTAB_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Object/SymbolicFile.h"
#include "llvm/Support/Endian.h"
namespace llvm {
namespace irsymtab {
namespace storage {
// The data structures in this namespace define the low-level serialization
// format. Clients that just want to read a symbol table should use the
// irsymtab::Reader class.
typedef support::ulittle32_t Word;
/// A reference to a string in the string table.
struct Str {
Word Offset;
StringRef get(StringRef Strtab) const {
return Strtab.data() + Offset;
}
};
/// A reference to a range of objects in the symbol table.
template <typename T> struct Range {
Word Offset, Size;
ArrayRef<T> get(StringRef Symtab) const {
return {reinterpret_cast<const T *>(Symtab.data() + Offset), Size};
}
};
/// Describes the range of a particular module's symbols within the symbol
/// table.
struct Module {
Word Begin, End;
};
/// This is equivalent to an IR comdat.
struct Comdat {
Str Name;
};
/// Contains the information needed by linkers for symbol resolution, as well as
/// by the LTO implementation itself.
struct Symbol {
/// The mangled symbol name.
Str Name;
/// The unmangled symbol name, or the empty string if this is not an IR
/// symbol.
Str IRName;
/// The index into Header::Comdats, or -1 if not a comdat member.
Word ComdatIndex;
Word Flags;
enum FlagBits {
FB_visibility, // 2 bits
FB_undefined = FB_visibility + 2,
FB_weak,
FB_common,
FB_indirect,
FB_used,
FB_tls,
FB_may_omit,
FB_global,
FB_format_specific,
FB_unnamed_addr,
};
/// The index into the Uncommon table, or -1 if this symbol does not have an
/// Uncommon.
Word UncommonIndex;
};
/// This data structure contains rarely used symbol fields and is optionally
/// referenced by a Symbol.
struct Uncommon {
Word CommonSize, CommonAlign;
/// COFF-specific: the name of the symbol that a weak external resolves to
/// if not defined.
Str COFFWeakExternFallbackName;
};
struct Header {
Range<Module> Modules;
Range<Comdat> Comdats;
Range<Symbol> Symbols;
Range<Uncommon> Uncommons;
Str SourceFileName;
/// COFF-specific: linker directives.
Str COFFLinkerOpts;
};
}
/// Fills in Symtab and Strtab with a valid symbol and string table for Mods.
Error build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
SmallVector<char, 0> &Strtab);
/// This represents a symbol that has been read from a storage::Symbol and
/// possibly a storage::Uncommon.
struct Symbol {
// Copied from storage::Symbol.
StringRef Name, IRName;
int ComdatIndex;
uint32_t Flags;
// Copied from storage::Uncommon.
uint32_t CommonSize, CommonAlign;
StringRef COFFWeakExternFallbackName;
/// Returns the mangled symbol name.
StringRef getName() const { return Name; }
/// Returns the unmangled symbol name, or the empty string if this is not an
/// IR symbol.
StringRef getIRName() const { return IRName; }
/// Returns the index into the comdat table (see Reader::getComdatTable()), or
/// -1 if not a comdat member.
int getComdatIndex() const { return ComdatIndex; }
using S = storage::Symbol;
GlobalValue::VisibilityTypes getVisibility() const {
return GlobalValue::VisibilityTypes((Flags >> S::FB_visibility) & 3);
}
bool isUndefined() const { return (Flags >> S::FB_undefined) & 1; }
bool isWeak() const { return (Flags >> S::FB_weak) & 1; }
bool isCommon() const { return (Flags >> S::FB_common) & 1; }
bool isIndirect() const { return (Flags >> S::FB_indirect) & 1; }
bool isUsed() const { return (Flags >> S::FB_used) & 1; }
bool isTLS() const { return (Flags >> S::FB_tls) & 1; }
bool canBeOmittedFromSymbolTable() const {
return (Flags >> S::FB_may_omit) & 1;
}
bool isGlobal() const { return (Flags >> S::FB_global) & 1; }
bool isFormatSpecific() const { return (Flags >> S::FB_format_specific) & 1; }
bool isUnnamedAddr() const { return (Flags >> S::FB_unnamed_addr) & 1; }
size_t getCommonSize() const {
assert(isCommon());
return CommonSize;
}
uint32_t getCommonAlignment() const {
assert(isCommon());
return CommonAlign;
}
/// COFF-specific: for weak externals, returns the name of the symbol that is
/// used as a fallback if the weak external remains undefined.
StringRef getCOFFWeakExternalFallback() const {
assert(isWeak() && isIndirect());
return COFFWeakExternFallbackName;
}
};
/// This class can be used to read a Symtab and Strtab produced by
/// irsymtab::build.
class Reader {
StringRef Symtab, Strtab;
ArrayRef<storage::Module> Modules;
ArrayRef<storage::Comdat> Comdats;
ArrayRef<storage::Symbol> Symbols;
ArrayRef<storage::Uncommon> Uncommons;
StringRef str(storage::Str S) const { return S.get(Strtab); }
template <typename T> ArrayRef<T> range(storage::Range<T> R) const {
return R.get(Symtab);
}
const storage::Header &header() const {
return *reinterpret_cast<const storage::Header *>(Symtab.data());
}
public:
class SymbolRef;
Reader() = default;
Reader(StringRef Symtab, StringRef Strtab) : Symtab(Symtab), Strtab(Strtab) {
Modules = range(header().Modules);
Comdats = range(header().Comdats);
Symbols = range(header().Symbols);
Uncommons = range(header().Uncommons);
}
typedef iterator_range<object::content_iterator<SymbolRef>> symbol_range;
/// Returns the symbol table for the entire bitcode file.
/// The symbols enumerated by this method are ephemeral, but they can be
/// copied into an irsymtab::Symbol object.
symbol_range symbols() const;
/// Returns a slice of the symbol table for the I'th module in the file.
/// The symbols enumerated by this method are ephemeral, but they can be
/// copied into an irsymtab::Symbol object.
symbol_range module_symbols(unsigned I) const;
/// Returns the source file path specified at compile time.
StringRef getSourceFileName() const { return str(header().SourceFileName); }
/// Returns a table with all the comdats used by this file.
std::vector<StringRef> getComdatTable() const {
std::vector<StringRef> ComdatTable;
ComdatTable.reserve(Comdats.size());
for (auto C : Comdats)
ComdatTable.push_back(str(C.Name));
return ComdatTable;
}
/// COFF-specific: returns linker options specified in the input file.
StringRef getCOFFLinkerOpts() const { return str(header().COFFLinkerOpts); }
};
/// Ephemeral symbols produced by Reader::symbols() and
/// Reader::module_symbols().
class Reader::SymbolRef : public Symbol {
const storage::Symbol *SymI, *SymE;
const Reader *R;
public:
SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE,
const Reader *R)
: SymI(SymI), SymE(SymE), R(R) {
read();
}
void read() {
if (SymI == SymE)
return;
Name = R->str(SymI->Name);
IRName = R->str(SymI->IRName);
ComdatIndex = SymI->ComdatIndex;
Flags = SymI->Flags;
uint32_t UncI = SymI->UncommonIndex;
if (UncI != -1u) {
const storage::Uncommon &Unc = R->Uncommons[UncI];
CommonSize = Unc.CommonSize;
CommonAlign = Unc.CommonAlign;
COFFWeakExternFallbackName = R->str(Unc.COFFWeakExternFallbackName);
}
}
void moveNext() {
++SymI;
read();
}
bool operator==(const SymbolRef &Other) const { return SymI == Other.SymI; }
};
inline Reader::symbol_range Reader::symbols() const {
return {SymbolRef(Symbols.begin(), Symbols.end(), this),
SymbolRef(Symbols.end(), Symbols.end(), this)};
}
inline Reader::symbol_range Reader::module_symbols(unsigned I) const {
const storage::Module &M = Modules[I];
const storage::Symbol *MBegin = Symbols.begin() + M.Begin,
*MEnd = Symbols.begin() + M.End;
return {SymbolRef(MBegin, MEnd, this), SymbolRef(MEnd, MEnd, this)};
}
}
}
#endif

View File

@ -305,14 +305,6 @@ void llvm::thinLTOInternalizeAndPromoteInIndex(
thinLTOInternalizeAndPromoteGUID(I.second, I.first, isExported);
}
struct InputFile::InputModule {
BitcodeModule BM;
std::unique_ptr<Module> Mod;
// The range of ModuleSymbolTable entries for this input module.
size_t SymBegin, SymEnd;
};
// Requires a destructor for std::vector<InputModule>.
InputFile::~InputFile() = default;
@ -333,87 +325,51 @@ Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) {
return make_error<StringError>("Bitcode file does not contain any modules",
inconvertibleErrorCode());
// Create an InputModule for each module in the InputFile, and add it to the
// ModuleSymbolTable.
File->Mods = *BMsOrErr;
LLVMContext Ctx;
std::vector<Module *> Mods;
std::vector<std::unique_ptr<Module>> OwnedMods;
for (auto BM : *BMsOrErr) {
Expected<std::unique_ptr<Module>> MOrErr =
BM.getLazyModule(File->Ctx, /*ShouldLazyLoadMetadata*/ true,
BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true,
/*IsImporting*/ false);
if (!MOrErr)
return MOrErr.takeError();
size_t SymBegin = File->SymTab.symbols().size();
File->SymTab.addModule(MOrErr->get());
size_t SymEnd = File->SymTab.symbols().size();
if ((*MOrErr)->getDataLayoutStr().empty())
return make_error<StringError>("input module has no datalayout",
inconvertibleErrorCode());
for (const auto &C : (*MOrErr)->getComdatSymbolTable()) {
auto P = File->ComdatMap.insert(
std::make_pair(&C.second, File->Comdats.size()));
assert(P.second);
(void)P;
File->Comdats.push_back(C.first());
}
Mods.push_back(MOrErr->get());
OwnedMods.push_back(std::move(*MOrErr));
}
File->Mods.push_back({BM, std::move(*MOrErr), SymBegin, SymEnd});
SmallVector<char, 0> Symtab;
if (Error E = irsymtab::build(Mods, Symtab, File->Strtab))
return std::move(E);
irsymtab::Reader R({Symtab.data(), Symtab.size()},
{File->Strtab.data(), File->Strtab.size()});
File->SourceFileName = R.getSourceFileName();
File->COFFLinkerOpts = R.getCOFFLinkerOpts();
File->ComdatTable = R.getComdatTable();
for (unsigned I = 0; I != Mods.size(); ++I) {
size_t Begin = File->Symbols.size();
for (const irsymtab::Reader::SymbolRef &Sym : R.module_symbols(I))
// Skip symbols that are irrelevant to LTO. Note that this condition needs
// to match the one in Skip() in LTO::addRegularLTO().
if (Sym.isGlobal() && !Sym.isFormatSpecific())
File->Symbols.push_back(Sym);
File->ModuleSymIndices.push_back({Begin, File->Symbols.size()});
}
return std::move(File);
}
Expected<int> InputFile::Symbol::getComdatIndex() const {
if (!isGV())
return -1;
const GlobalObject *GO = getGV()->getBaseObject();
if (!GO)
return make_error<StringError>("Unable to determine comdat of alias!",
inconvertibleErrorCode());
if (const Comdat *C = GO->getComdat()) {
auto I = File->ComdatMap.find(C);
assert(I != File->ComdatMap.end());
return I->second;
}
return -1;
}
Expected<std::string> InputFile::getLinkerOpts() {
std::string LinkerOpts;
raw_string_ostream LOS(LinkerOpts);
// Extract linker options from module metadata.
for (InputModule &Mod : Mods) {
std::unique_ptr<Module> &M = Mod.Mod;
if (auto E = M->materializeMetadata())
return std::move(E);
if (Metadata *Val = M->getModuleFlag("Linker Options")) {
MDNode *LinkerOptions = cast<MDNode>(Val);
for (const MDOperand &MDOptions : LinkerOptions->operands())
for (const MDOperand &MDOption : cast<MDNode>(MDOptions)->operands())
LOS << " " << cast<MDString>(MDOption)->getString();
}
}
// Synthesize export flags for symbols with dllexport storage.
const Triple TT(Mods[0].Mod->getTargetTriple());
Mangler M;
for (const ModuleSymbolTable::Symbol &Sym : SymTab.symbols())
if (auto *GV = Sym.dyn_cast<GlobalValue*>())
emitLinkerFlagsForGlobalCOFF(LOS, GV, TT, M);
LOS.flush();
return LinkerOpts;
}
StringRef InputFile::getName() const {
return Mods[0].BM.getModuleIdentifier();
}
StringRef InputFile::getSourceFileName() const {
return Mods[0].Mod->getSourceFileName();
}
iterator_range<InputFile::symbol_iterator>
InputFile::module_symbols(InputModule &IM) {
return llvm::make_range(
symbol_iterator(SymTab.symbols().data() + IM.SymBegin, SymTab, this),
symbol_iterator(SymTab.symbols().data() + IM.SymEnd, SymTab, this));
return Mods[0].getModuleIdentifier();
}
LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel,
@ -437,21 +393,17 @@ LTO::LTO(Config Conf, ThinBackend Backend,
LTO::~LTO() = default;
// Add the given symbol to the GlobalResolutions map, and resolve its partition.
void LTO::addSymbolToGlobalRes(SmallPtrSet<GlobalValue *, 8> &Used,
const InputFile::Symbol &Sym,
void LTO::addSymbolToGlobalRes(const InputFile::Symbol &Sym,
SymbolResolution Res, unsigned Partition) {
GlobalValue *GV = Sym.isGV() ? Sym.getGV() : nullptr;
auto &GlobalRes = GlobalResolutions[Sym.getName()];
if (GV) {
GlobalRes.UnnamedAddr &= GV->hasGlobalUnnamedAddr();
if (Res.Prevailing)
GlobalRes.IRName = GV->getName();
}
GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
if (Res.Prevailing)
GlobalRes.IRName = Sym.getIRName();
// Set the partition to external if we know it is used elsewhere, e.g.
// it is visible to a regular object, is referenced from llvm.compiler_used,
// or was already recorded as being referenced from a different partition.
if (Res.VisibleToRegularObj || (GV && Used.count(GV)) ||
if (Res.VisibleToRegularObj || Sym.isUsed() ||
(GlobalRes.Partition != GlobalResolution::Unknown &&
GlobalRes.Partition != Partition)) {
GlobalRes.Partition = GlobalResolution::External;
@ -495,41 +447,32 @@ Error LTO::add(std::unique_ptr<InputFile> Input,
writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res);
const SymbolResolution *ResI = Res.begin();
for (InputFile::InputModule &IM : Input->Mods)
if (Error Err = addModule(*Input, IM, ResI, Res.end()))
for (unsigned I = 0; I != Input->Mods.size(); ++I)
if (Error Err = addModule(*Input, I, ResI, Res.end()))
return Err;
assert(ResI == Res.end());
return Error::success();
}
Error LTO::addModule(InputFile &Input, InputFile::InputModule &IM,
Error LTO::addModule(InputFile &Input, unsigned ModI,
const SymbolResolution *&ResI,
const SymbolResolution *ResE) {
// FIXME: move to backend
Module &M = *IM.Mod;
if (M.getDataLayoutStr().empty())
return make_error<StringError>("input module has no datalayout",
inconvertibleErrorCode());
if (!Conf.OverrideTriple.empty())
M.setTargetTriple(Conf.OverrideTriple);
else if (M.getTargetTriple().empty())
M.setTargetTriple(Conf.DefaultTriple);
Expected<bool> HasThinLTOSummary = IM.BM.hasSummary();
Expected<bool> HasThinLTOSummary = Input.Mods[ModI].hasSummary();
if (!HasThinLTOSummary)
return HasThinLTOSummary.takeError();
auto ModSyms = Input.module_symbols(ModI);
if (*HasThinLTOSummary)
return addThinLTO(IM.BM, M, Input.module_symbols(IM), ResI, ResE);
return addThinLTO(Input.Mods[ModI], ModSyms, ResI, ResE);
else
return addRegularLTO(IM.BM, ResI, ResE);
return addRegularLTO(Input.Mods[ModI], ModSyms, ResI, ResE);
}
// Add a regular LTO object to the link.
Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
Error LTO::addRegularLTO(BitcodeModule BM,
ArrayRef<InputFile::Symbol> Syms,
const SymbolResolution *&ResI,
const SymbolResolution *ResE) {
if (!RegularLTO.CombinedModule) {
RegularLTO.CombinedModule =
@ -550,9 +493,6 @@ Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
ModuleSymbolTable SymTab;
SymTab.addModule(&M);
SmallPtrSet<GlobalValue *, 8> Used;
collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
std::vector<GlobalValue *> Keep;
for (GlobalVariable &GV : M.globals())
@ -564,17 +504,35 @@ Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
if (GlobalObject *GO = GA.getBaseObject())
AliasedGlobals.insert(GO);
for (const InputFile::Symbol &Sym :
make_range(InputFile::symbol_iterator(SymTab.symbols().begin(), SymTab,
nullptr),
InputFile::symbol_iterator(SymTab.symbols().end(), SymTab,
nullptr))) {
// In this function we need IR GlobalValues matching the symbols in Syms
// (which is not backed by a module), so we need to enumerate them in the same
// order. The symbol enumeration order of a ModuleSymbolTable intentionally
// matches the order of an irsymtab, but when we read the irsymtab in
// InputFile::create we omit some symbols that are irrelevant to LTO. The
// Skip() function skips the same symbols from the module as InputFile does
// from the symbol table.
auto MsymI = SymTab.symbols().begin(), MsymE = SymTab.symbols().end();
auto Skip = [&]() {
while (MsymI != MsymE) {
auto Flags = SymTab.getSymbolFlags(*MsymI);
if ((Flags & object::BasicSymbolRef::SF_Global) &&
!(Flags & object::BasicSymbolRef::SF_FormatSpecific))
return;
++MsymI;
}
};
Skip();
for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
SymbolResolution Res = *ResI++;
addSymbolToGlobalRes(Used, Sym, Res, 0);
addSymbolToGlobalRes(Sym, Res, 0);
if (Sym.isGV()) {
GlobalValue *GV = Sym.getGV();
assert(MsymI != MsymE);
ModuleSymbolTable::Symbol Msym = *MsymI++;
Skip();
if (GlobalValue *GV = Msym.dyn_cast<GlobalValue *>()) {
if (Res.Prevailing) {
if (Sym.isUndefined())
continue;
@ -612,7 +570,7 @@ Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
if (Sym.isCommon()) {
// FIXME: We should figure out what to do about commons defined by asm.
// For now they aren't reported correctly by ModuleSymbolTable.
auto &CommonRes = RegularLTO.Commons[Sym.getGV()->getName()];
auto &CommonRes = RegularLTO.Commons[Sym.getIRName()];
CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize());
CommonRes.Align = std::max(CommonRes.Align, Sym.getCommonAlignment());
CommonRes.Prevailing |= Res.Prevailing;
@ -620,6 +578,7 @@ Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
// FIXME: use proposed local attribute for FinalDefinitionInLinkageUnit.
}
assert(MsymI == MsymE);
return RegularLTO.Mover->move(std::move(*MOrErr), Keep,
[](GlobalValue &, IRMover::ValueAdder) {},
@ -627,15 +586,10 @@ Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
}
// Add a ThinLTO object to the link.
// FIXME: This function should not need to take as many parameters once we have
// a bitcode symbol table.
Error LTO::addThinLTO(BitcodeModule BM, Module &M,
iterator_range<InputFile::symbol_iterator> Syms,
Error LTO::addThinLTO(BitcodeModule BM,
ArrayRef<InputFile::Symbol> Syms,
const SymbolResolution *&ResI,
const SymbolResolution *ResE) {
SmallPtrSet<GlobalValue *, 8> Used;
collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr = BM.getSummary();
if (!SummaryOrErr)
return SummaryOrErr.takeError();
@ -645,11 +599,15 @@ Error LTO::addThinLTO(BitcodeModule BM, Module &M,
for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
SymbolResolution Res = *ResI++;
addSymbolToGlobalRes(Used, Sym, Res, ThinLTO.ModuleMap.size() + 1);
addSymbolToGlobalRes(Sym, Res, ThinLTO.ModuleMap.size() + 1);
if (Res.Prevailing && Sym.isGV())
ThinLTO.PrevailingModuleForGUID[Sym.getGV()->getGUID()] =
BM.getModuleIdentifier();
if (Res.Prevailing) {
if (!Sym.getIRName().empty()) {
auto GUID = GlobalValue::getGUID(GlobalValue::getGlobalIdentifier(
Sym.getIRName(), GlobalValue::ExternalLinkage, ""));
ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier();
}
}
}
if (!ThinLTO.ModuleMap.insert({BM.getModuleIdentifier(), BM}).second)

View File

@ -8,6 +8,7 @@ add_llvm_library(LLVMObject
ELFObjectFile.cpp
Error.cpp
IRObjectFile.cpp
IRSymtab.cpp
MachOObjectFile.cpp
MachOUniversal.cpp
ModuleSummaryIndexObjectFile.cpp

View File

@ -0,0 +1,228 @@
//===- IRSymtab.cpp - implementation of IR symbol tables --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/Object/IRSymtab.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/StringSaver.h"
using namespace llvm;
using namespace irsymtab;
namespace {
/// Stores the temporary state that is required to build an IR symbol table.
struct Builder {
SmallVector<char, 0> &Symtab;
SmallVector<char, 0> &Strtab;
Builder(SmallVector<char, 0> &Symtab, SmallVector<char, 0> &Strtab)
: Symtab(Symtab), Strtab(Strtab) {}
StringTableBuilder StrtabBuilder{StringTableBuilder::ELF};
BumpPtrAllocator Alloc;
StringSaver Saver{Alloc};
DenseMap<const Comdat *, unsigned> ComdatMap;
ModuleSymbolTable Msymtab;
SmallPtrSet<GlobalValue *, 8> Used;
Mangler Mang;
Triple TT;
std::vector<storage::Comdat> Comdats;
std::vector<storage::Module> Mods;
std::vector<storage::Symbol> Syms;
std::vector<storage::Uncommon> Uncommons;
std::string COFFLinkerOpts;
raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts};
void setStr(storage::Str &S, StringRef Value) {
S.Offset = StrtabBuilder.add(Value);
}
template <typename T>
void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) {
R.Offset = Symtab.size();
R.Size = Objs.size();
Symtab.insert(Symtab.end(), reinterpret_cast<const char *>(Objs.data()),
reinterpret_cast<const char *>(Objs.data() + Objs.size()));
}
Error addModule(Module *M);
Error addSymbol(ModuleSymbolTable::Symbol Sym);
Error build(ArrayRef<Module *> Mods);
};
Error Builder::addModule(Module *M) {
collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false);
storage::Module Mod;
Mod.Begin = Msymtab.symbols().size();
Msymtab.addModule(M);
Mod.End = Msymtab.symbols().size();
Mods.push_back(Mod);
if (TT.isOSBinFormatCOFF()) {
if (auto E = M->materializeMetadata())
return E;
if (Metadata *Val = M->getModuleFlag("Linker Options")) {
MDNode *LinkerOptions = cast<MDNode>(Val);
for (const MDOperand &MDOptions : LinkerOptions->operands())
for (const MDOperand &MDOption : cast<MDNode>(MDOptions)->operands())
COFFLinkerOptsOS << " " << cast<MDString>(MDOption)->getString();
}
}
return Error::success();
}
Error Builder::addSymbol(ModuleSymbolTable::Symbol Msym) {
Syms.emplace_back();
storage::Symbol &Sym = Syms.back();
Sym = {};
Sym.UncommonIndex = -1;
storage::Uncommon *Unc = nullptr;
auto Uncommon = [&]() -> storage::Uncommon & {
if (Unc)
return *Unc;
Sym.UncommonIndex = Uncommons.size();
Uncommons.emplace_back();
Unc = &Uncommons.back();
*Unc = {};
setStr(Unc->COFFWeakExternFallbackName, "");
return *Unc;
};
SmallString<64> Name;
{
raw_svector_ostream OS(Name);
Msymtab.printSymbolName(OS, Msym);
}
setStr(Sym.Name, Saver.save(StringRef(Name)));
auto Flags = Msymtab.getSymbolFlags(Msym);
if (Flags & object::BasicSymbolRef::SF_Undefined)
Sym.Flags |= 1 << storage::Symbol::FB_undefined;
if (Flags & object::BasicSymbolRef::SF_Weak)
Sym.Flags |= 1 << storage::Symbol::FB_weak;
if (Flags & object::BasicSymbolRef::SF_Common)
Sym.Flags |= 1 << storage::Symbol::FB_common;
if (Flags & object::BasicSymbolRef::SF_Indirect)
Sym.Flags |= 1 << storage::Symbol::FB_indirect;
if (Flags & object::BasicSymbolRef::SF_Global)
Sym.Flags |= 1 << storage::Symbol::FB_global;
if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
Sym.Flags |= 1 << storage::Symbol::FB_format_specific;
Sym.ComdatIndex = -1;
auto *GV = Msym.dyn_cast<GlobalValue *>();
if (!GV) {
setStr(Sym.IRName, "");
return Error::success();
}
setStr(Sym.IRName, GV->getName());
if (Used.count(GV))
Sym.Flags |= 1 << storage::Symbol::FB_used;
if (GV->isThreadLocal())
Sym.Flags |= 1 << storage::Symbol::FB_tls;
if (GV->hasGlobalUnnamedAddr())
Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr;
if (canBeOmittedFromSymbolTable(GV))
Sym.Flags |= 1 << storage::Symbol::FB_may_omit;
Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility;
if (Flags & object::BasicSymbolRef::SF_Common) {
Uncommon().CommonSize = GV->getParent()->getDataLayout().getTypeAllocSize(
GV->getType()->getElementType());
Uncommon().CommonAlign = GV->getAlignment();
}
const GlobalObject *Base = GV->getBaseObject();
if (!Base)
return make_error<StringError>("Unable to determine comdat of alias!",
inconvertibleErrorCode());
if (const Comdat *C = Base->getComdat()) {
auto P = ComdatMap.insert(std::make_pair(C, Comdats.size()));
Sym.ComdatIndex = P.first->second;
if (P.second) {
storage::Comdat Comdat;
setStr(Comdat.Name, C->getName());
Comdats.push_back(Comdat);
}
}
if (TT.isOSBinFormatCOFF()) {
emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS, GV, TT, Mang);
if ((Flags & object::BasicSymbolRef::SF_Weak) &&
(Flags & object::BasicSymbolRef::SF_Indirect)) {
std::string FallbackName;
raw_string_ostream OS(FallbackName);
Msymtab.printSymbolName(
OS, cast<GlobalValue>(
cast<GlobalAlias>(GV)->getAliasee()->stripPointerCasts()));
OS.flush();
setStr(Uncommon().COFFWeakExternFallbackName, Saver.save(FallbackName));
}
}
return Error::success();
}
Error Builder::build(ArrayRef<Module *> IRMods) {
storage::Header Hdr;
assert(!IRMods.empty());
setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName());
TT = Triple(IRMods[0]->getTargetTriple());
// This adds the symbols for each module to Msymtab.
for (auto *M : IRMods)
if (Error Err = addModule(M))
return Err;
for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols())
if (Error Err = addSymbol(Msym))
return Err;
COFFLinkerOptsOS.flush();
setStr(Hdr.COFFLinkerOpts, COFFLinkerOpts);
// We are about to fill in the header's range fields, so reserve space for it
// and copy it in afterwards.
Symtab.resize(sizeof(storage::Header));
writeRange(Hdr.Modules, Mods);
writeRange(Hdr.Comdats, Comdats);
writeRange(Hdr.Symbols, Syms);
writeRange(Hdr.Uncommons, Uncommons);
*reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr;
raw_svector_ostream OS(Strtab);
StrtabBuilder.finalizeInOrder();
StrtabBuilder.write(OS);
return Error::success();
}
} // anonymous namespace
Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
SmallVector<char, 0> &Strtab) {
return Builder(Symtab, Strtab).build(Mods);
}

View File

@ -465,7 +465,7 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
EC == object::object_error::bitcode_section_not_found)
*claimed = 0;
else
message(LDPL_ERROR,
message(LDPL_FATAL,
"LLVM gold plugin has failed to create LTO module: %s",
EI.message().c_str());
});
@ -536,7 +536,7 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
sym.size = 0;
sym.comdat_key = nullptr;
int CI = check(Sym.getComdatIndex());
int CI = Sym.getComdatIndex();
if (CI != -1) {
StringRef C = Obj->getComdatTable()[CI];
sym.comdat_key = strdup(C.str().c_str());