[elfabi] Add support for reading dynamic symbols from binaries

This patch adds initial support for reading dynamic symbols from ELF binaries. Currently, STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_TLS are explicitly supported. Other symbol types are mapped to ELFSymbolType::Unknown to improve signal/noise ratio.

Symbols must meet two criteria to be read into in an ELFStub:

 - The symbol's binding must be STB_GLOBAL or STB_WEAK.
 - The symbol's visibility must be STV_DEFAULT or STV_PROTECTED.

This filters out symbols that aren't of interest during compile-time linking against a shared object.

This change uses DT_HASH and DT_GNU_HASH to determine the size of .dynsym. Using hash tables to determine the number of symbols in .dynsym allows llvm-elfabi to work on binaries without relying on section headers.

Differential Revision: https://reviews.llvm.org/D56031

llvm-svn: 352121
This commit is contained in:
Armando Montanez 2019-01-24 22:39:21 +00:00
parent ffcb2d5bf8
commit c38aa8169e
14 changed files with 248 additions and 20 deletions

Binary file not shown.

Binary file not shown.

View File

@ -18,11 +18,12 @@ Sections:
Flags: [ SHF_ALLOC ]
Address: 0x0008
AddressAlign: 8
Content: "0a0000000000000001000000000000000500000000000000000000000000000000000000000000000000000000000000"
Content: "0a000000000000000100000000000000050000000000000000000000000000000600000000000000000000000000000000000000000000000000000000000000"
# DT_STRSZ 1 (0x1)
# DT_STRTAB 0x0
# DT_SYMTAB 0x0
# DT_NULL 0x0
Size: 48
Size: 64
Link: .dynstr
ProgramHeaders:
- Type: PT_LOAD

View File

@ -18,11 +18,12 @@ Sections:
Flags: [ SHF_ALLOC ]
Address: 0x0008
AddressAlign: 8
Content: "0a0000000000000001000000000000000500000000000000000000000000000000000000000000000000000000000000"
Content: "0a000000000000000100000000000000050000000000000000000000000000000600000000000000000000000000000000000000000000000000000000000000"
# DT_STRSZ 1 (0x1)
# DT_STRTAB 0x0
# DT_SYMTAB 0x0
# DT_NULL 0x0
Size: 48
Size: 64
Link: .dynstr
ProgramHeaders:
- Type: PT_LOAD

View File

@ -18,12 +18,13 @@ Sections:
Flags: [ SHF_ALLOC ]
Address: 0x0008
AddressAlign: 8
Content: "0e000000000000000d000000000000000a0000000000000001000000000000000500000000000000000000000000000000000000000000000000000000000000"
Content: "0e000000000000000d000000000000000a000000000000000100000000000000050000000000000000000000000000000600000000000000000000000000000000000000000000000000000000000000"
# DT_SONAME 13 (0x0d)
# DT_STRSZ 1 (0x01)
# DT_STRTAB 0x0
# DT_SYMTAB 0x0
# DT_NULL 0x0
Size: 64
Size: 80
Link: .dynstr
ProgramHeaders:
- Type: PT_LOAD

View File

@ -18,12 +18,13 @@ Sections:
Flags: [ SHF_ALLOC ]
Address: 0x1008
AddressAlign: 8
Content: "0e0000000000000000000000000000000a0000000000000001000000000000000500000000000000600200000000000000000000000000000000000000000000"
Content: "0e0000000000000000000000000000000a000000000000000100000000000000050000000000000060020000000000000600000000000000001000000000000000000000000000000000000000000000"
# DT_SONAME 0
# DT_STRSZ 1
# DT_STRTAB 0x0260 # Bad vaddr (no PT_LOAD for 0x0000 to 0x0FFF)
# DT_SYMTAB 0x1000
# DT_NULL 0x0
Size: 64
Size: 80
Link: .dynstr
ProgramHeaders:
- Type: PT_LOAD

View File

@ -18,15 +18,16 @@ Sections:
Type: SHT_DYNAMIC
Flags: [ SHF_ALLOC ]
Address: 0x1024
Content: "010000000000000001000000000000000e0000000000000015000000000000000100000000000000ffff0000000000000a0000000000000024000000000000000500000000000000001000000000000000000000000000000000000000000000"
Content: "010000000000000001000000000000000e0000000000000015000000000000000100000000000000ffff0000000000000a000000000000002400000000000000050000000000000000100000000000000600000000000000001000000000000000000000000000000000000000000000"
# DT_NEEDED 1 (0x01)
# DT_SONAME 21 (0x15)
# Bad DT_NEEDED entry (offset outside string table):
# DT_NEEDED 65535 (0xffff)
# DT_STRSZ 36 (0x24)
# DT_STRTAB 0x1000
# DT_SYMTAB 0x1000
# DT_NULL 0x0
Size: 96
Size: 112
ProgramHeaders:
- Type: PT_LOAD
Flags: [ PF_R ]

View File

@ -18,14 +18,15 @@ Sections:
Type: SHT_DYNAMIC
Flags: [ SHF_ALLOC ]
Address: 0x1024
Content: "010000000000000001000000000000000e00000000000000150000000000000001000000000000000b000000000000000a0000000000000024000000000000000500000000000000001000000000000000000000000000000000000000000000"
Content: "010000000000000001000000000000000e00000000000000150000000000000001000000000000000b000000000000000a000000000000002400000000000000050000000000000000100000000000000600000000000000001000000000000000000000000000000000000000000000"
# DT_NEEDED 1 (0x01)
# DT_SONAME 21 (0x15)
# DT_NEEDED 11 (0x0b)
# DT_STRSZ 36 (0x24)
# DT_STRTAB 0x1000
# DT_SYMTAB 0x1000
# DT_NULL 0x0
Size: 96
Size: 112
ProgramHeaders:
- Type: PT_LOAD
Flags: [ PF_R ]

View File

@ -20,12 +20,13 @@ Sections:
Flags: [ SHF_ALLOC ]
Address: 0x1018
AddressAlign: 8
Content: "0e0000000000000005000000000000000a0000000000000014000000000000000500000000000000001000000000000000000000000000000000000000000000"
Content: "0e0000000000000005000000000000000a000000000000001400000000000000050000000000000000100000000000000600000000000000001000000000000000000000000000000000000000000000"
# DT_SONAME 5 (0x05)
# DT_STRSZ 20 (0x14)
# DT_STRTAB 0x1000
# DT_SYMTAB 0x1000
# DT_NULL 0x0
Size: 64
Size: 80
Link: .dynstr
ProgramHeaders:
- Type: PT_LOAD

View File

@ -19,12 +19,13 @@ Sections:
Flags: [ SHF_ALLOC ]
Address: 0x1018
AddressAlign: 8
Content: "0e0000000000000005000000000000000a000000000000000f000000000000000500000000000000001000000000000000000000000000000000000000000000"
Content: "0e0000000000000005000000000000000a000000000000000f00000000000000050000000000000000100000000000000600000000000000001000000000000000000000000000000000000000000000"
# DT_SONAME 5 (0x05)
# DT_STRSZ 15 (0x0F)
# DT_STRTAB 0x1000
# DT_SYMTAB 0x1000
# DT_NULL 0x0
Size: 64
Size: 80
Link: .dynstr
ProgramHeaders:
- Type: PT_LOAD

View File

@ -19,12 +19,13 @@ Sections:
Flags: [ SHF_ALLOC ]
Address: 0x1018
AddressAlign: 8
Content: "0e0000000000000005000000000000000a0000000000000014000000000000000500000000000000001000000000000000000000000000000000000000000000"
Content: "0e0000000000000005000000000000000a000000000000001400000000000000050000000000000000100000000000000600000000000000001000000000000000000000000000000000000000000000"
# DT_SONAME 5 (0x05)
# DT_STRSZ 20 (0x14)
# DT_STRTAB 0x1000
# DT_SYMTAB 0x1000
# DT_NULL 0x0
Size: 64
Size: 80
Link: .dynstr
ProgramHeaders:
- Type: PT_LOAD

View File

@ -0,0 +1,22 @@
# RUN: llvm-elfabi --elf %p/Inputs/gnu_hash.so --emit-tbe=- | FileCheck %s
# CHECK: --- !tapi-tbe
# CHECK-NEXT: TbeVersion: 1.0
# CHECK-NEXT: SoName: libsomething.so
# CHECK-NEXT: Arch: x86_64
# CHECK-NEXT: NeededLibs:
# CHECK-NEXT: - libm.so.6
# CHECK-NEXT: - libc.so.6
# CHECK-NEXT: - ld-linux-x86-64.so.2
# CHECK-NEXT: Symbols:
# CHECK-NEXT: AGlobalInteger: { Type: Object, Size: 4 }
# CHECK-NEXT: AThreadLocalLongInteger: { Type: TLS, Size: 8 }
# CHECK-NEXT: _ITM_deregisterTMCloneTable: { Type: NoType, Undefined: true, Weak: true }
# CHECK-NEXT: _ITM_registerTMCloneTable: { Type: NoType, Undefined: true, Weak: true }
# CHECK-NEXT: _Z11rotateArrayPii: { Type: Func }
# CHECK-NEXT: __cxa_finalize: { Type: Func, Undefined: true, Weak: true }
# CHECK-NEXT: __gmon_start__: { Type: NoType, Undefined: true, Weak: true }
# CHECK-NEXT: __tls_get_addr: { Type: Func, Undefined: true }
# CHECK-NEXT: _fini: { Type: Func }
# CHECK-NEXT: _init: { Type: Func }
# CHECK-NEXT: ...

View File

@ -0,0 +1,22 @@
# RUN: llvm-elfabi --elf %p/Inputs/sysv_hash.so --emit-tbe=- | FileCheck %s
# CHECK: --- !tapi-tbe
# CHECK-NEXT: TbeVersion: 1.0
# CHECK-NEXT: SoName: libsomething.so
# CHECK-NEXT: Arch: x86_64
# CHECK-NEXT: NeededLibs:
# CHECK-NEXT: - libm.so.6
# CHECK-NEXT: - libc.so.6
# CHECK-NEXT: - ld-linux-x86-64.so.2
# CHECK-NEXT: Symbols:
# CHECK-NEXT: AGlobalInteger: { Type: Object, Size: 4 }
# CHECK-NEXT: AThreadLocalLongInteger: { Type: TLS, Size: 8 }
# CHECK-NEXT: _ITM_deregisterTMCloneTable: { Type: NoType, Undefined: true, Weak: true }
# CHECK-NEXT: _ITM_registerTMCloneTable: { Type: NoType, Undefined: true, Weak: true }
# CHECK-NEXT: _Z11rotateArrayPii: { Type: Func }
# CHECK-NEXT: __cxa_finalize: { Type: Func, Undefined: true, Weak: true }
# CHECK-NEXT: __gmon_start__: { Type: NoType, Undefined: true, Weak: true }
# CHECK-NEXT: __tls_get_addr: { Type: Func, Undefined: true }
# CHECK-NEXT: _fini: { Type: Func }
# CHECK-NEXT: _init: { Type: Func }
# CHECK-NEXT: ...

View File

@ -31,6 +31,11 @@ struct DynamicEntries {
uint64_t StrSize = 0;
Optional<uint64_t> SONameOffset;
std::vector<uint64_t> NeededLibNames;
// Symbol table:
uint64_t DynSymAddr = 0;
// Hash tables:
Optional<uint64_t> ElfHash;
Optional<uint64_t> GnuHash;
};
/// This function behaves similarly to StringRef::substr(), but attempts to
@ -81,6 +86,7 @@ static Error populateDynamic(DynamicEntries &Dyn,
// Search .dynamic for relevant entries.
bool FoundDynStr = false;
bool FoundDynStrSz = false;
bool FoundDynSym = false;
for (auto &Entry : DynTable) {
switch (Entry.d_tag) {
case DT_SONAME:
@ -97,6 +103,15 @@ static Error populateDynamic(DynamicEntries &Dyn,
case DT_NEEDED:
Dyn.NeededLibNames.push_back(Entry.d_un.d_val);
break;
case DT_SYMTAB:
Dyn.DynSymAddr = Entry.d_un.d_ptr;
FoundDynSym = true;
break;
case DT_HASH:
Dyn.ElfHash = Entry.d_un.d_ptr;
break;
case DT_GNU_HASH:
Dyn.GnuHash = Entry.d_un.d_ptr;
}
}
@ -108,6 +123,10 @@ static Error populateDynamic(DynamicEntries &Dyn,
return createError(
"Couldn't determine dynamic string table size (no DT_STRSZ entry)");
}
if (!FoundDynSym) {
return createError(
"Couldn't locate dynamic symbol table (no DT_SYMTAB entry)");
}
if (Dyn.SONameOffset.hasValue() && *Dyn.SONameOffset >= Dyn.StrSize) {
return createStringError(
object_error::parse_failed,
@ -126,6 +145,142 @@ static Error populateDynamic(DynamicEntries &Dyn,
return Error::success();
}
/// This function finds the number of dynamic symbols using a GNU hash table.
///
/// @param Table The GNU hash table for .dynsym.
template <class ELFT>
static uint64_t getDynSymtabSize(const typename ELFT::GnuHash &Table) {
using Elf_Word = typename ELFT::Word;
if (Table.nbuckets == 0)
return Table.symndx + 1;
uint64_t LastSymIdx = 0;
uint64_t BucketVal = 0;
// Find the index of the first symbol in the last chain.
for (Elf_Word Val : Table.buckets()) {
BucketVal = std::max(BucketVal, (uint64_t)Val);
}
LastSymIdx += BucketVal;
const Elf_Word *It =
reinterpret_cast<const Elf_Word *>(Table.values(BucketVal).end());
// Locate the end of the chain to find the last symbol index.
while ((*It & 1) == 0) {
LastSymIdx++;
It++;
}
return LastSymIdx + 1;
}
/// This function determines the number of dynamic symbols.
/// Without access to section headers, the number of symbols must be determined
/// by parsing dynamic hash tables.
///
/// @param Dyn Entries with the locations of hash tables.
/// @param ElfFile The ElfFile that the section contents reside in.
template <class ELFT>
static Expected<uint64_t> getNumSyms(DynamicEntries &Dyn,
const ELFFile<ELFT> &ElfFile) {
using Elf_Hash = typename ELFT::Hash;
using Elf_GnuHash = typename ELFT::GnuHash;
// Search GNU hash table to try to find the upper bound of dynsym.
if (Dyn.GnuHash.hasValue()) {
Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.GnuHash);
if (!TablePtr)
return TablePtr.takeError();
const Elf_GnuHash *Table =
reinterpret_cast<const Elf_GnuHash *>(TablePtr.get());
return getDynSymtabSize<ELFT>(*Table);
}
// Search SYSV hash table to try to find the upper bound of dynsym.
if (Dyn.ElfHash.hasValue()) {
Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.ElfHash);
if (!TablePtr)
return TablePtr.takeError();
const Elf_Hash *Table = reinterpret_cast<const Elf_Hash *>(TablePtr.get());
return Table->nchain;
}
return 0;
}
/// This function extracts symbol type from a symbol's st_info member and
/// maps it to an ELFSymbolType enum.
/// Currently, STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_TLS are supported.
/// Other symbol types are mapped to ELFSymbolType::Unknown.
///
/// @param Info Binary symbol st_info to extract symbol type from.
static ELFSymbolType convertInfoToType(uint8_t Info) {
Info = Info & 0xf;
switch (Info) {
case ELF::STT_NOTYPE:
return ELFSymbolType::NoType;
case ELF::STT_OBJECT:
return ELFSymbolType::Object;
case ELF::STT_FUNC:
return ELFSymbolType::Func;
case ELF::STT_TLS:
return ELFSymbolType::TLS;
default:
return ELFSymbolType::Unknown;
}
}
/// This function creates an ELFSymbol and populates all members using
/// information from a binary ELFT::Sym.
///
/// @param SymName The desired name of the ELFSymbol.
/// @param RawSym ELFT::Sym to extract symbol information from.
template <class ELFT>
static ELFSymbol createELFSym(StringRef SymName,
const typename ELFT::Sym &RawSym) {
ELFSymbol TargetSym(SymName);
uint8_t Binding = RawSym.getBinding();
if (Binding == STB_WEAK)
TargetSym.Weak = true;
else
TargetSym.Weak = false;
TargetSym.Undefined = RawSym.isUndefined();
TargetSym.Type = convertInfoToType(RawSym.st_info);
if (TargetSym.Type == ELFSymbolType::Func) {
TargetSym.Size = 0;
} else {
TargetSym.Size = RawSym.st_size;
}
return TargetSym;
}
/// This function populates an ELFStub with symbols using information read
/// from an ELF binary.
///
/// @param TargetStub ELFStub to add symbols to.
/// @param DynSym Range of dynamic symbols to add to TargetStub.
/// @param DynStr StringRef to the dynamic string table.
template <class ELFT>
static Error populateSymbols(ELFStub &TargetStub,
const typename ELFT::SymRange DynSym,
StringRef DynStr) {
// Skips the first symbol since it's the NULL symbol.
for (auto RawSym : DynSym.drop_front(1)) {
// If a symbol does not have global or weak binding, ignore it.
uint8_t Binding = RawSym.getBinding();
if (!(Binding == STB_GLOBAL || Binding == STB_WEAK))
continue;
// If a symbol doesn't have default or protected visibility, ignore it.
uint8_t Visibility = RawSym.getVisibility();
if (!(Visibility == STV_DEFAULT || Visibility == STV_PROTECTED))
continue;
// Create an ELFSymbol and populate it with information from the symbol
// table entry.
Expected<StringRef> SymName = terminatedSubstr(DynStr, RawSym.st_name);
if (!SymName)
return SymName.takeError();
ELFSymbol Sym = createELFSym<ELFT>(*SymName, RawSym);
TargetStub.Symbols.insert(std::move(Sym));
// TODO: Populate symbol warning.
}
return Error::success();
}
/// Returns a new ELFStub with all members populated from an ELFObjectFile.
/// @param ElfObj Source ELFObjectFile.
template <class ELFT>
@ -133,6 +288,8 @@ static Expected<std::unique_ptr<ELFStub>>
buildStub(const ELFObjectFile<ELFT> &ElfObj) {
using Elf_Dyn_Range = typename ELFT::DynRange;
using Elf_Phdr_Range = typename ELFT::PhdrRange;
using Elf_Sym_Range = typename ELFT::SymRange;
using Elf_Sym = typename ELFT::Sym;
std::unique_ptr<ELFStub> DestStub = make_unique<ELFStub>();
const ELFFile<ELFT> *ElfFile = ElfObj.getELFFile();
// Fetch .dynamic table.
@ -152,7 +309,7 @@ buildStub(const ELFObjectFile<ELFT> &ElfObj) {
if (Error Err = populateDynamic<ELFT>(DynEnt, *DynTable))
return std::move(Err);
// Convert .dynstr address to an offset.
// Get pointer to in-memory location of .dynstr section.
Expected<const uint8_t *> DynStrPtr =
ElfFile->toMappedAddr(DynEnt.StrTabAddr);
if (!DynStrPtr)
@ -185,7 +342,25 @@ buildStub(const ELFObjectFile<ELFT> &ElfObj) {
DestStub->NeededLibs.push_back(*LibNameOrErr);
}
// TODO: Populate Symbols from .dynsym table and linked string table.
// Populate Symbols from .dynsym table and dynamic string table.
Expected<uint64_t> SymCount = getNumSyms(DynEnt, *ElfFile);
if (!SymCount)
return SymCount.takeError();
if (*SymCount > 0) {
// Get pointer to in-memory location of .dynsym section.
Expected<const uint8_t *> DynSymPtr =
ElfFile->toMappedAddr(DynEnt.DynSymAddr);
if (!DynSymPtr)
return appendToError(DynSymPtr.takeError(),
"when locating .dynsym section contents");
Elf_Sym_Range DynSyms =
ArrayRef<Elf_Sym>(reinterpret_cast<const Elf_Sym *>(*DynSymPtr),
*SymCount);
Error SymReadError = populateSymbols<ELFT>(*DestStub, DynSyms, DynStr);
if (SymReadError)
return appendToError(std::move(SymReadError),
"when reading dynamic symbols");
}
return std::move(DestStub);
}