[llvm-elfabi] Support ELF file that lacks .gnu.hash section

Before this change, when reading ELF file, elfabi determines number of
entries in .dynsym by reading the .gnu.hash section. This change makes
elfabi read section headers directly first. This change allows elfabi
works on ELF files which do not have .gnu.hash sections.

Differential Revision: https://reviews.llvm.org/D93362
This commit is contained in:
Haowei Wu 2021-01-26 11:34:51 -08:00
parent 4210b87020
commit 15313f64be
3 changed files with 211 additions and 57 deletions

View File

@ -217,6 +217,8 @@ public:
Expected<ArrayRef<Elf_Word>> getSHNDXTable(const Elf_Shdr &Section,
Elf_Shdr_Range Sections) const;
Expected<uint64_t> getDynSymtabSize() const;
StringRef getRelocationTypeName(uint32_t Type) const;
void getRelocationTypeName(uint32_t Type,
SmallVectorImpl<char> &Result) const;
@ -651,6 +653,99 @@ ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections,
return getStringTable(Sections[Index], WarnHandler);
}
/// This function finds the number of dynamic symbols using a GNU hash table.
///
/// @param Table The GNU hash table for .dynsym.
template <class ELFT>
static Expected<uint64_t>
getDynSymtabSizeFromGnuHash(const typename ELFT::GnuHash &Table,
const void *BufEnd) {
using Elf_Word = typename ELFT::Word;
if (Table.nbuckets == 0)
return Table.symndx + 1;
uint64_t LastSymIdx = 0;
// Find the index of the first symbol in the last chain.
for (Elf_Word Val : Table.buckets())
LastSymIdx = std::max(LastSymIdx, (uint64_t)Val);
const Elf_Word *It =
reinterpret_cast<const Elf_Word *>(Table.values(LastSymIdx).end());
// Locate the end of the chain to find the last symbol index.
while (It < BufEnd && (*It & 1) == 0) {
++LastSymIdx;
++It;
}
if (It >= BufEnd) {
return createStringError(
object_error::parse_failed,
"no terminator found for GNU hash section before buffer end");
}
return LastSymIdx + 1;
}
/// This function determines the number of dynamic symbols. It reads section
/// headers first. If section headers are not available, the number of
/// symbols will be inferred by parsing dynamic hash tables.
template <class ELFT>
Expected<uint64_t> ELFFile<ELFT>::getDynSymtabSize() const {
// Read .dynsym section header first if available.
Expected<Elf_Shdr_Range> SectionsOrError = sections();
if (!SectionsOrError)
return SectionsOrError.takeError();
for (const Elf_Shdr &Sec : *SectionsOrError) {
if (Sec.sh_type == ELF::SHT_DYNSYM) {
if (Sec.sh_size % Sec.sh_entsize != 0) {
return createStringError(object_error::parse_failed,
"SHT_DYNSYM section has sh_size (" +
Twine(Sec.sh_size) + ") % sh_entsize (" +
Twine(Sec.sh_entsize) + ") that is not 0");
}
return Sec.sh_size / Sec.sh_entsize;
}
}
if (!SectionsOrError->empty()) {
// Section headers are available but .dynsym header is not found.
// Return 0 as .dynsym does not exist.
return 0;
}
// Section headers do not exist. Falling back to infer
// upper bound of .dynsym from .gnu.hash and .hash.
Expected<Elf_Dyn_Range> DynTable = dynamicEntries();
if (!DynTable)
return DynTable.takeError();
llvm::Optional<uint64_t> ElfHash;
llvm::Optional<uint64_t> ElfGnuHash;
for (const Elf_Dyn &Entry : *DynTable) {
switch (Entry.d_tag) {
case ELF::DT_HASH:
ElfHash = Entry.d_un.d_ptr;
break;
case ELF::DT_GNU_HASH:
ElfGnuHash = Entry.d_un.d_ptr;
break;
}
}
if (ElfGnuHash) {
Expected<const uint8_t *> TablePtr = toMappedAddr(*ElfGnuHash);
if (!TablePtr)
return TablePtr.takeError();
const Elf_GnuHash *Table =
reinterpret_cast<const Elf_GnuHash *>(TablePtr.get());
return getDynSymtabSizeFromGnuHash<ELFT>(*Table, this->Buf.bytes_end());
}
// Search SYSV hash table to try to find the upper bound of dynsym.
if (ElfHash) {
Expected<const uint8_t *> TablePtr = toMappedAddr(*ElfHash);
if (!TablePtr)
return TablePtr.takeError();
const Elf_Hash *Table = reinterpret_cast<const Elf_Hash *>(TablePtr.get());
return Table->nchain;
}
return 0;
}
template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {}
template <class ELFT>

View File

@ -440,62 +440,6 @@ static Error populateDynamic(DynamicEntries &Dyn,
return Error::success();
}
/// This function finds the number of dynamic symbols using a GNU hash table.
///
/// @param Table The GNU hash table for .dynsym.
template <class ELFT>
static uint64_t getDynSymtabSize(const typename ELFT::GnuHash &Table) {
using Elf_Word = typename ELFT::Word;
if (Table.nbuckets == 0)
return Table.symndx + 1;
uint64_t LastSymIdx = 0;
uint64_t BucketVal = 0;
// Find the index of the first symbol in the last chain.
for (Elf_Word Val : Table.buckets()) {
BucketVal = std::max(BucketVal, (uint64_t)Val);
}
LastSymIdx += BucketVal;
const Elf_Word *It =
reinterpret_cast<const Elf_Word *>(Table.values(BucketVal).end());
// Locate the end of the chain to find the last symbol index.
while ((*It & 1) == 0) {
LastSymIdx++;
It++;
}
return LastSymIdx + 1;
}
/// This function determines the number of dynamic symbols.
/// Without access to section headers, the number of symbols must be determined
/// by parsing dynamic hash tables.
///
/// @param Dyn Entries with the locations of hash tables.
/// @param ElfFile The ElfFile that the section contents reside in.
template <class ELFT>
static Expected<uint64_t> getNumSyms(DynamicEntries &Dyn,
const ELFFile<ELFT> &ElfFile) {
using Elf_Hash = typename ELFT::Hash;
using Elf_GnuHash = typename ELFT::GnuHash;
// Search GNU hash table to try to find the upper bound of dynsym.
if (Dyn.GnuHash.hasValue()) {
Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.GnuHash);
if (!TablePtr)
return TablePtr.takeError();
const Elf_GnuHash *Table =
reinterpret_cast<const Elf_GnuHash *>(TablePtr.get());
return getDynSymtabSize<ELFT>(*Table);
}
// Search SYSV hash table to try to find the upper bound of dynsym.
if (Dyn.ElfHash.hasValue()) {
Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.ElfHash);
if (!TablePtr)
return TablePtr.takeError();
const Elf_Hash *Table = reinterpret_cast<const Elf_Hash *>(TablePtr.get());
return Table->nchain;
}
return 0;
}
/// This function extracts symbol type from a symbol's st_info member and
/// maps it to an ELFSymbolType enum.
/// Currently, STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_TLS are supported.
@ -637,7 +581,7 @@ buildStub(const ELFObjectFile<ELFT> &ElfObj) {
}
// Populate Symbols from .dynsym table and dynamic string table.
Expected<uint64_t> SymCount = getNumSyms(DynEnt, ElfFile);
Expected<uint64_t> SymCount = ElfFile.getDynSymtabSize();
if (!SymCount)
return SymCount.takeError();
if (*SymCount > 0) {

View File

@ -0,0 +1,115 @@
## Test reading ELF with .dynsym under the following conditions:
## * Section headers are available.
## * Section headers are stripped but there is a DT_GNU_HASH dynamic tag.
## * Section headers are stripped but there is a DT_HASH dynamic tag.
## Test if llvm-elfabi reads DT_SYMTAB size through section headers by puting the wrong terminator in DT_GNU_HASH.
# RUN: yaml2obj %s -o %tfull -DGNUHASHVALUE="[0x9]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00"
# RUN: llvm-elfabi --elf %tfull --emit-tbe=- | FileCheck %s
## Test if llvm-elfabi fails to read DT_SYMTAB size through section headers when the value of sh_entsize is invalid.
# RUN: yaml2obj %s -o %tfull -DGNUHASHVALUE="[0x9]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00" -DENTSIZE="0x19"
# RUN: not llvm-elfabi --elf %tfull --emit-tbe=- 2>&1 | FileCheck %s --check-prefix=BADENTSIZE
## Test if llvm-elfabi reads DT_SYMTAB size through DT_GNU_HASH.
# RUN: yaml2obj %s -o %tw.gnu.hash -DGNUHASHVALUE="[0x8, 0x9]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00" -DNOHEADER="true"
# RUN: llvm-elfabi --elf %tw.gnu.hash --emit-tbe=- | FileCheck %s
## Test if llvm-elfabi fails to read DT_SYMTAB size through DT_GNU_HASH when there is no terminator.
# RUN: yaml2obj %s -o %tw.gnu.hash -DGNUHASHVALUE="[0x8, 0xA]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00" -DNOHEADER="true"
# RUN: not llvm-elfabi --elf %tw.gnu.hash --emit-tbe=- 2>&1 | FileCheck %s --check-prefix=NOTERMINATOR
# CHECK: --- !tapi-tbe
# CHECK-NEXT: TbeVersion: 1.0
# CHECK-NEXT: Arch: AArch64
# CHECK-NEXT: Symbols:
# CHECK-NEXT: bar: { Type: Object, Size: 0, Undefined: true }
# CHECK-NEXT: foo: { Type: Func, Undefined: true }
# CHECK-NEXT: ...
# BADENTSIZE: SHT_DYNSYM section has sh_size (72) % sh_entsize (25) that is not 0
# NOTERMINATOR: error: no terminator found for GNU hash section before buffer end
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
Type: ET_DYN
Machine: EM_AARCH64
Sections:
- Name: .text
Type: SHT_PROGBITS
- Name: .data
Type: SHT_PROGBITS
- Name: .strtab
Type: SHT_STRTAB
- Name: .shstrtab
Type: SHT_STRTAB
- Name: .dynsym
Type: SHT_DYNSYM
Flags: [ SHF_ALLOC ]
EntSize: [[ENTSIZE=0x18]]
Address: 0x400
AddressAlign: 0x400
- Name: .dynstr
Type: SHT_STRTAB
Flags: [ SHF_ALLOC ]
Address: 0x600
AddressAlign: 0x200
- Name: .dynamic
Type: SHT_DYNAMIC
Flags: [ SHF_ALLOC ]
Address: 0x800
AddressAlign: 0x200
Entries:
- Tag: DT_STRTAB
Value: 0x600
- Tag: DT_STRSZ
Value: 9
- Tag: DT_SYMTAB
Value: 0x400
- Tag: [[TAG1]]
Value: [[VAL1]]
- Tag: DT_NULL
Value: 0
- Name: .hash
Type: SHT_HASH
Flags: [ SHF_ALLOC ]
Address: 0xA00
AddressAlign: 0x200
Bucket: [ 1 ]
Chain: [ 1, 2, 3 ]
- Name: .gnu.hash
Type: SHT_GNU_HASH
Flags: [ SHF_ALLOC ]
Address: 0xC00
AddressAlign: 0x200
Header:
SymNdx: 0x1
Shift2: 0x2
MaskWords: 2
NBuckets: 2
BloomFilter: [0x3, 0x4]
HashBuckets: [0x0, 0x1]
HashValues: [[GNUHASHVALUE]]
DynamicSymbols:
- Name: foo
Type: STT_FUNC
Value: 0x100
Binding: 1
- Name: bar
Type: STT_OBJECT
Value: 0x200
Binding: 1
ProgramHeaders:
- Type: PT_LOAD
VAddr: 0x400
FirstSec: .dynsym
LastSec: .gnu.hash
- Type: PT_DYNAMIC
VAddr: 0x800
FirstSec: .dynamic
LastSec: .dynamic
SectionHeaderTable:
NoHeaders: [[NOHEADER=false]]