Introduce StringRefZ class to represent null-terminated strings.

StringRefZ is a class to represent a null-terminated string. String
length is computed lazily, so it's more efficient than StringRef to
represent strings in string table.

The motivation of defining this new class is to merge functions
that only differ in string types; we have many constructors that takes
`const char *` or `StringRef`. With StringRefZ, we can merge them.

Differential Revision: https://reviews.llvm.org/D27037

llvm-svn: 288172
This commit is contained in:
Rui Ueyama 2016-11-29 18:05:04 +00:00
parent c62b64a9e8
commit a13efc2a73
8 changed files with 95 additions and 84 deletions

View File

@ -438,6 +438,11 @@ SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) {
int Binding = Sym->getBinding();
InputSectionBase<ELFT> *Sec = getSection(*Sym);
uint8_t StOther = Sym->st_other;
uint8_t Type = Sym->getType();
uintX_t Value = Sym->st_value;
uintX_t Size = Sym->st_size;
if (Binding == STB_LOCAL) {
if (Sym->getType() == STT_FILE)
SourceFile = check(Sym->getName(this->StringTable));
@ -447,20 +452,19 @@ SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) {
const char *Name = this->StringTable.data() + Sym->st_name;
if (Sym->st_shndx == SHN_UNDEF)
return new (BAlloc) Undefined(Name, Sym->st_other, Sym->getType(), this);
return new (BAlloc) DefinedRegular<ELFT>(Name, *Sym, Sec);
return new (BAlloc)
Undefined(Name, /*IsLocal=*/true, StOther, Type, this);
return new (BAlloc) DefinedRegular<ELFT>(Name, /*IsLocal=*/true, StOther,
Type, Value, Size, Sec, this);
}
StringRef Name = check(Sym->getName(this->StringTable));
uint8_t StOther = Sym->st_other;
uint8_t Type = Sym->getType();
uintX_t Value = Sym->st_value;
uintX_t Size = Sym->st_size;
switch (Sym->st_shndx) {
case SHN_UNDEF:
return elf::Symtab<ELFT>::X
->addUndefined(Name, Binding, StOther, Type,
->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type,
/*CanOmitFromDynSym=*/false, this)
->body();
case SHN_COMMON:
@ -480,7 +484,7 @@ SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) {
case STB_GNU_UNIQUE:
if (Sec == &InputSection<ELFT>::Discarded)
return elf::Symtab<ELFT>::X
->addUndefined(Name, Binding, StOther, Type,
->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type,
/*CanOmitFromDynSym=*/false, this)
->body();
return elf::Symtab<ELFT>::X
@ -723,12 +727,14 @@ static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats,
int C = check(ObjSym.getComdatIndex());
if (C != -1 && !KeptComdats[C])
return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type,
CanOmitFromDynSym, F);
return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding,
Visibility, Type, CanOmitFromDynSym,
F);
if (Flags & BasicSymbolRef::SF_Undefined)
return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type,
CanOmitFromDynSym, F);
return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding,
Visibility, Type, CanOmitFromDynSym,
F);
if (Flags & BasicSymbolRef::SF_Common)
return Symtab<ELFT>::X->addCommon(NameRef, ObjSym.getCommonSize(),

View File

@ -97,8 +97,8 @@ BitcodeCompiler::BitcodeCompiler() : LTOObj(createLTO()) {}
BitcodeCompiler::~BitcodeCompiler() = default;
static void undefine(Symbol *S) {
replaceBody<Undefined>(S, S->body()->getName(), STV_DEFAULT, S->body()->Type,
nullptr);
replaceBody<Undefined>(S, S->body()->getName(), /*IsLocal=*/false,
STV_DEFAULT, S->body()->Type, nullptr);
}
void BitcodeCompiler::add(BitcodeFile &F) {

View File

@ -16,6 +16,7 @@
#include "llvm/Config/config.h"
#include "llvm/Demangle/Demangle.h"
#include <algorithm>
#include <cstring>
using namespace llvm;
using namespace lld;

View File

@ -26,6 +26,36 @@ std::vector<uint8_t> parseHex(StringRef S);
bool isValidCIdentifier(StringRef S);
StringRef unquote(StringRef S);
// This is a lazy version of StringRef. String size is computed lazily
// when it is needed. It is more efficient than StringRef to instantiate
// if you have a string whose size is unknown.
//
// ELF string tables contain a lot of null-terminated strings.
// Most of them are not necessary for the linker because they are names
// of local symbols and the linker doesn't use local symbol names for
// name resolution. So, we use this class to represents strings read
// from string tables.
class StringRefZ {
public:
StringRefZ() : Start(nullptr), Size(0) {}
StringRefZ(const char *S, size_t Size) : Start(S), Size(Size) {}
/*implicit*/ StringRefZ(const char *S) : Start(S), Size(-1) {}
/*implicit*/ StringRefZ(llvm::StringRef S)
: Start(S.data()), Size(S.size()) {}
operator llvm::StringRef() const {
if (Size == (size_t)-1)
Size = strlen(Start);
return {Start, Size};
}
private:
const char *Start;
mutable size_t Size;
};
// This class represents a glob pattern. Supported metacharacters
// are "*", "?", "[<chars>]" and "[^<chars>]".
class GlobPattern {

View File

@ -236,14 +236,15 @@ SymbolTable<ELFT>::insert(StringRef Name, uint8_t Type, uint8_t Visibility,
}
template <class ELFT> Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name) {
return addUndefined(Name, STB_GLOBAL, STV_DEFAULT, /*Type*/ 0,
return addUndefined(Name, /*IsLocal=*/false, STB_GLOBAL, STV_DEFAULT,
/*Type*/ 0,
/*CanOmitFromDynSym*/ false, /*File*/ nullptr);
}
template <class ELFT>
Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, uint8_t Binding,
uint8_t StOther, uint8_t Type,
bool CanOmitFromDynSym,
Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, bool IsLocal,
uint8_t Binding, uint8_t StOther,
uint8_t Type, bool CanOmitFromDynSym,
InputFile *File) {
Symbol *S;
bool WasInserted;
@ -251,7 +252,7 @@ Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, uint8_t Binding,
insert(Name, Type, StOther & 3, CanOmitFromDynSym, File);
if (WasInserted) {
S->Binding = Binding;
replaceBody<Undefined>(S, Name, StOther, Type, File);
replaceBody<Undefined>(S, Name, IsLocal, StOther, Type, File);
return S;
}
if (Binding != STB_WEAK) {
@ -378,8 +379,8 @@ Symbol *SymbolTable<ELFT>::addRegular(StringRef Name, uint8_t StOther,
/*CanOmitFromDynSym*/ false, File);
int Cmp = compareDefinedNonCommon(S, WasInserted, Binding);
if (Cmp > 0)
replaceBody<DefinedRegular<ELFT>>(S, Name, StOther, Type, Value, Size,
Section, File);
replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type,
Value, Size, Section, File);
else if (Cmp == 0)
reportDuplicate(S->body(), Section, Value);
return S;
@ -432,7 +433,8 @@ Symbol *SymbolTable<ELFT>::addBitcode(StringRef Name, uint8_t Binding,
insert(Name, Type, StOther & 3, CanOmitFromDynSym, F);
int Cmp = compareDefinedNonCommon(S, WasInserted, Binding);
if (Cmp > 0)
replaceBody<DefinedRegular<ELFT>>(S, Name, StOther, Type, 0, 0, nullptr, F);
replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type,
0, 0, nullptr, F);
else if (Cmp == 0)
reportDuplicate(S->body(), F);
return S;

View File

@ -55,8 +55,9 @@ public:
uint8_t Visibility = llvm::ELF::STV_HIDDEN);
Symbol *addUndefined(StringRef Name);
Symbol *addUndefined(StringRef Name, uint8_t Binding, uint8_t StOther,
uint8_t Type, bool CanOmitFromDynSym, InputFile *File);
Symbol *addUndefined(StringRef Name, bool IsLocal, uint8_t Binding,
uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym,
InputFile *File);
Symbol *addRegular(StringRef Name, uint8_t StOther, uint8_t Type,
uintX_t Value, uintX_t Size, uint8_t Binding,

View File

@ -92,22 +92,12 @@ static typename ELFT::uint getSymVA(const SymbolBody &Body,
llvm_unreachable("invalid symbol kind");
}
SymbolBody::SymbolBody(Kind K, const char *Name, uint8_t StOther, uint8_t Type)
: SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(true),
SymbolBody::SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther,
uint8_t Type)
: SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(IsLocal),
IsInGlobalMipsGot(false), Is32BitMipsGot(false), Type(Type),
StOther(StOther), Name(Name) {}
SymbolBody::SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type)
: SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(false),
IsInGlobalMipsGot(false), Is32BitMipsGot(false), Type(Type),
StOther(StOther), NameLen(Name.size()), Name(Name.data()) {}
StringRef SymbolBody::getName() const {
if (NameLen == (uint32_t)-1)
NameLen = strlen(Name);
return StringRef(Name, NameLen);
}
// Returns true if a symbol can be replaced at load-time by a symbol
// with the same name defined in other ELF executable or DSO.
bool SymbolBody::isPreemptible() const {
@ -203,7 +193,7 @@ void SymbolBody::parseSymbolVersion() {
return;
// Truncate the symbol name so that it doesn't include the version string.
NameLen = Pos;
Name = {S.data(), Pos};
// '@@' in a symbol name means the default version.
// It is usually the most recent one.
@ -226,11 +216,9 @@ void SymbolBody::parseSymbolVersion() {
error("symbol " + S + " has undefined version " + Verstr);
}
Defined::Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type)
: SymbolBody(K, Name, StOther, Type) {}
Defined::Defined(Kind K, const char *Name, uint8_t StOther, uint8_t Type)
: SymbolBody(K, Name, StOther, Type) {}
Defined::Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther,
uint8_t Type)
: SymbolBody(K, Name, IsLocal, StOther, Type) {}
template <class ELFT> bool DefinedRegular<ELFT>::isMipsPIC() const {
if (!Section || !isFunc())
@ -239,27 +227,23 @@ template <class ELFT> bool DefinedRegular<ELFT>::isMipsPIC() const {
(Section->getFile()->getObj().getHeader()->e_flags & EF_MIPS_PIC);
}
Undefined::Undefined(StringRef Name, uint8_t StOther, uint8_t Type,
InputFile *File)
: SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) {
this->File = File;
}
Undefined::Undefined(const char *Name, uint8_t StOther, uint8_t Type,
InputFile *File)
: SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) {
Undefined::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther,
uint8_t Type, InputFile *File)
: SymbolBody(SymbolBody::UndefinedKind, Name, IsLocal, StOther, Type) {
this->File = File;
}
template <typename ELFT>
DefinedSynthetic<ELFT>::DefinedSynthetic(StringRef N, uintX_t Value,
DefinedSynthetic<ELFT>::DefinedSynthetic(StringRef Name, uintX_t Value,
const OutputSectionBase *Section)
: Defined(SymbolBody::DefinedSyntheticKind, N, STV_HIDDEN, 0 /* Type */),
: Defined(SymbolBody::DefinedSyntheticKind, Name, /*IsLocal=*/false,
STV_HIDDEN, 0 /* Type */),
Value(Value), Section(Section) {}
DefinedCommon::DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment,
DefinedCommon::DefinedCommon(StringRef Name, uint64_t Size, uint64_t Alignment,
uint8_t StOther, uint8_t Type, InputFile *File)
: Defined(SymbolBody::DefinedCommonKind, N, StOther, Type),
: Defined(SymbolBody::DefinedCommonKind, Name, /*IsLocal=*/false, StOther,
Type),
Alignment(Alignment), Size(Size) {
this->File = File;
}

View File

@ -16,6 +16,7 @@
#define LLD_ELF_SYMBOLS_H
#include "InputSection.h"
#include "Strings.h"
#include "lld/Core/LLVM.h"
#include "llvm/Object/Archive.h"
@ -28,7 +29,6 @@ class ArchiveFile;
class BitcodeFile;
class InputFile;
class LazyObjectFile;
class SymbolBody;
template <class ELFT> class ObjectFile;
template <class ELFT> class OutputSection;
class OutputSectionBase;
@ -69,7 +69,7 @@ public:
bool isShared() const { return SymbolKind == SharedKind; }
bool isLocal() const { return IsLocal; }
bool isPreemptible() const;
StringRef getName() const;
StringRef getName() const { return Name; }
uint8_t getVisibility() const { return StOther & 0x3; }
void parseSymbolVersion();
@ -98,8 +98,8 @@ public:
uint32_t GlobalDynIndex = -1;
protected:
SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type);
SymbolBody(Kind K, const char *Name, uint8_t StOther, uint8_t Type);
SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther,
uint8_t Type);
const unsigned SymbolKind : 8;
@ -136,17 +136,13 @@ public:
bool isFile() const { return Type == llvm::ELF::STT_FILE; }
protected:
// Local symbols are not inserted to the symbol table, so we usually
// don't need their names at all. We read symbol names lazily if possible.
mutable uint32_t NameLen = (uint32_t)-1;
const char *Name;
StringRefZ Name;
};
// The base class for any defined symbols.
class Defined : public SymbolBody {
public:
Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type);
Defined(Kind K, const char *Name, uint8_t StOther, uint8_t Type);
Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type);
static bool classof(const SymbolBody *S) { return S->isDefined(); }
};
@ -175,25 +171,15 @@ template <class ELFT> class DefinedRegular : public Defined {
typedef typename ELFT::uint uintX_t;
public:
DefinedRegular(StringRef Name, uint8_t StOther, uint8_t Type, uintX_t Value,
uintX_t Size, InputSectionBase<ELFT> *Section, InputFile *File)
: Defined(SymbolBody::DefinedRegularKind, Name, StOther, Type),
DefinedRegular(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type,
uintX_t Value, uintX_t Size, InputSectionBase<ELFT> *Section,
InputFile *File)
: Defined(SymbolBody::DefinedRegularKind, Name, IsLocal, StOther, Type),
Value(Value), Size(Size),
Section(Section ? Section->Repl : NullInputSection) {
this->File = File;
}
DefinedRegular(const char *Name, const Elf_Sym &Sym,
InputSectionBase<ELFT> *Section)
: Defined(SymbolBody::DefinedRegularKind, Name, Sym.st_other,
Sym.getType()),
Value(Sym.st_value), Size(Sym.st_size),
Section(Section ? Section->Repl : NullInputSection) {
assert(isLocal());
if (Section)
this->File = Section->getFile();
}
// Return true if the symbol is a PIC function.
bool isMipsPIC() const;
@ -248,8 +234,8 @@ public:
class Undefined : public SymbolBody {
public:
Undefined(StringRef Name, uint8_t StOther, uint8_t Type, InputFile *F);
Undefined(const char *Name, uint8_t StOther, uint8_t Type, InputFile *F);
Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type,
InputFile *F);
static bool classof(const SymbolBody *S) {
return S->kind() == UndefinedKind;
@ -270,7 +256,8 @@ public:
SharedSymbol(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym,
const Elf_Verdef *Verdef)
: Defined(SymbolBody::SharedKind, Name, Sym.st_other, Sym.getType()),
: Defined(SymbolBody::SharedKind, Name, /*IsLocal=*/false, Sym.st_other,
Sym.getType()),
Sym(Sym), Verdef(Verdef) {
// IFuncs defined in DSOs are treated as functions by the static linker.
if (isGnuIFunc())
@ -309,7 +296,7 @@ public:
protected:
Lazy(SymbolBody::Kind K, StringRef Name, uint8_t Type)
: SymbolBody(K, Name, llvm::ELF::STV_DEFAULT, Type) {}
: SymbolBody(K, Name, /*IsLocal=*/false, llvm::ELF::STV_DEFAULT, Type) {}
};
// LazyArchive symbols represents symbols in archive files.