[BOLT] Support instrumentation hook via DT_FINI_ARRAY (#67348)

BOLT currently hooks its its instrumentation finalization function via
`DT_FINI`. However, this method of calling finalization routines is not
supported anymore on newer ABIs like RISC-V. `DT_FINI_ARRAY` is
preferred there.

This patch adds support for hooking into `DT_FINI_ARRAY` instead if the
binary does not have a `DT_FINI` entry. If it does, `DT_FINI` takes
precedence so this patch should not change how the currently supported
instrumentation targets behave.

`DT_FINI_ARRAY` points to an array in memory of `DT_FINI_ARRAYSZ` bytes.
It consists of pointer-length entries that contain the addresses of
finalization functions. However, the addresses are only filled-in by the
dynamic linker at load time using relative relocations. This makes
hooking via `DT_FINI_ARRAY` a bit more complicated than via `DT_FINI`.

The implementation works as follows:
- While scanning the binary: find the section where `DT_FINI_ARRAY`
points to, read its first dynamic relocation and use its addend to find
the address of the fini function we will use to hook;
- While writing the output file: overwrite the addend of the dynamic
relocation with the address of the runtime library's fini function.

Updating the dynamic relocation required a bit of boiler plate: since
dynamic relocations are stored in a `std::multiset` which doesn't
support getting mutable references to its items, functions were added to
`BinarySection` to take an existing relocation and insert a new one.
This commit is contained in:
Job Noorman 2023-11-08 11:01:10 +00:00 committed by GitHub
parent 8d72079077
commit 96b5e092dc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 287 additions and 10 deletions

View File

@ -680,6 +680,15 @@ public:
/// the execution of the binary is completed. /// the execution of the binary is completed.
std::optional<uint64_t> FiniFunctionAddress; std::optional<uint64_t> FiniFunctionAddress;
/// DT_FINI.
std::optional<uint64_t> FiniAddress;
/// DT_FINI_ARRAY. Only used when DT_FINI is not set.
std::optional<uint64_t> FiniArrayAddress;
/// DT_FINI_ARRAYSZ. Only used when DT_FINI is not set.
std::optional<uint64_t> FiniArraySize;
/// Page alignment used for code layout. /// Page alignment used for code layout.
uint64_t PageAlign{HugePageSize}; uint64_t PageAlign{HugePageSize};

View File

@ -375,8 +375,12 @@ public:
/// Add a dynamic relocation at the given /p Offset. /// Add a dynamic relocation at the given /p Offset.
void addDynamicRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type, void addDynamicRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type,
uint64_t Addend, uint64_t Value = 0) { uint64_t Addend, uint64_t Value = 0) {
assert(Offset < getSize() && "offset not within section bounds"); addDynamicRelocation(Relocation{Offset, Symbol, Type, Addend, Value});
DynamicRelocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value}); }
void addDynamicRelocation(const Relocation &Reloc) {
assert(Reloc.Offset < getSize() && "offset not within section bounds");
DynamicRelocations.emplace(Reloc);
} }
/// Add relocation against the original contents of this section. /// Add relocation against the original contents of this section.
@ -410,6 +414,18 @@ public:
return Itr != DynamicRelocations.end() ? &*Itr : nullptr; return Itr != DynamicRelocations.end() ? &*Itr : nullptr;
} }
std::optional<Relocation> takeDynamicRelocationAt(uint64_t Offset) {
Relocation Key{Offset, 0, 0, 0, 0};
auto Itr = DynamicRelocations.find(Key);
if (Itr == DynamicRelocations.end())
return std::nullopt;
Relocation Reloc = *Itr;
DynamicRelocations.erase(Itr);
return Reloc;
}
uint64_t hash(const BinaryData &BD) const { uint64_t hash(const BinaryData &BD) const {
std::map<const BinaryData *, uint64_t> Cache; std::map<const BinaryData *, uint64_t> Cache;
return hash(BD, Cache); return hash(BD, Cache);

View File

@ -95,6 +95,15 @@ private:
/// from meta data in the file. /// from meta data in the file.
void discoverFileObjects(); void discoverFileObjects();
/// Check whether we should use DT_FINI or DT_FINI_ARRAY for instrumentation.
/// DT_FINI is preferred; DT_FINI_ARRAY is only used when no DT_FINI entry was
/// found.
Error discoverRtFiniAddress();
/// If DT_FINI_ARRAY is used for instrumentation, update the relocation of its
/// first entry to point to the instrumentation library's fini address.
void updateRtFiniReloc();
/// Create and initialize metadata rewriters for this instance. /// Create and initialize metadata rewriters for this instance.
void initializeMetadataManager(); void initializeMetadataManager();

View File

@ -365,7 +365,9 @@ static uint64_t encodeValueAArch64(uint64_t Type, uint64_t Value, uint64_t PC) {
switch (Type) { switch (Type) {
default: default:
llvm_unreachable("unsupported relocation"); llvm_unreachable("unsupported relocation");
case ELF::R_AARCH64_ABS16:
case ELF::R_AARCH64_ABS32: case ELF::R_AARCH64_ABS32:
case ELF::R_AARCH64_ABS64:
break; break;
case ELF::R_AARCH64_PREL16: case ELF::R_AARCH64_PREL16:
case ELF::R_AARCH64_PREL32: case ELF::R_AARCH64_PREL32:

View File

@ -704,6 +704,10 @@ Error RewriteInstance::run() {
adjustCommandLineOptions(); adjustCommandLineOptions();
discoverFileObjects(); discoverFileObjects();
if (opts::Instrument && !BC->IsStaticExecutable)
if (Error E = discoverRtFiniAddress())
return E;
preprocessProfileData(); preprocessProfileData();
// Skip disassembling if we have a translation table and we are running an // Skip disassembling if we have a translation table and we are running an
@ -740,6 +744,9 @@ Error RewriteInstance::run() {
updateMetadata(); updateMetadata();
if (opts::Instrument && !BC->IsStaticExecutable)
updateRtFiniReloc();
if (opts::LinuxKernelMode) { if (opts::LinuxKernelMode) {
errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n"; errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n";
return Error::success(); return Error::success();
@ -1280,6 +1287,77 @@ void RewriteInstance::discoverFileObjects() {
registerFragments(); registerFragments();
} }
Error RewriteInstance::discoverRtFiniAddress() {
// Use DT_FINI if it's available.
if (BC->FiniAddress) {
BC->FiniFunctionAddress = BC->FiniAddress;
return Error::success();
}
if (!BC->FiniArrayAddress || !BC->FiniArraySize) {
return createStringError(
std::errc::not_supported,
"Instrumentation needs either DT_FINI or DT_FINI_ARRAY");
}
if (*BC->FiniArraySize < BC->AsmInfo->getCodePointerSize()) {
return createStringError(std::errc::not_supported,
"Need at least 1 DT_FINI_ARRAY slot");
}
ErrorOr<BinarySection &> FiniArraySection =
BC->getSectionForAddress(*BC->FiniArrayAddress);
if (auto EC = FiniArraySection.getError())
return errorCodeToError(EC);
if (const Relocation *Reloc = FiniArraySection->getDynamicRelocationAt(0)) {
BC->FiniFunctionAddress = Reloc->Addend;
return Error::success();
}
if (const Relocation *Reloc = FiniArraySection->getRelocationAt(0)) {
BC->FiniFunctionAddress = Reloc->Value;
return Error::success();
}
return createStringError(std::errc::not_supported,
"No relocation for first DT_FINI_ARRAY slot");
}
void RewriteInstance::updateRtFiniReloc() {
// Updating DT_FINI is handled by patchELFDynamic.
if (BC->FiniAddress)
return;
const RuntimeLibrary *RT = BC->getRuntimeLibrary();
if (!RT || !RT->getRuntimeFiniAddress())
return;
assert(BC->FiniArrayAddress && BC->FiniArraySize &&
"inconsistent .fini_array state");
ErrorOr<BinarySection &> FiniArraySection =
BC->getSectionForAddress(*BC->FiniArrayAddress);
assert(FiniArraySection && ".fini_array removed");
if (std::optional<Relocation> Reloc =
FiniArraySection->takeDynamicRelocationAt(0)) {
assert(Reloc->Addend == BC->FiniFunctionAddress &&
"inconsistent .fini_array dynamic relocation");
Reloc->Addend = RT->getRuntimeFiniAddress();
FiniArraySection->addDynamicRelocation(*Reloc);
}
// Update the static relocation by adding a pending relocation which will get
// patched when flushPendingRelocations is called in rewriteFile. Note that
// flushPendingRelocations will calculate the value to patch as
// "Symbol + Addend". Since we don't have a symbol, just set the addend to the
// desired value.
FiniArraySection->addPendingRelocation(Relocation{
/*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
/*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0});
}
void RewriteInstance::registerFragments() { void RewriteInstance::registerFragments() {
if (!BC->HasSplitFunctions) if (!BC->HasSplitFunctions)
return; return;
@ -5135,7 +5213,13 @@ Error RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
} }
break; break;
case ELF::DT_FINI: case ELF::DT_FINI:
BC->FiniFunctionAddress = Dyn.getPtr(); BC->FiniAddress = Dyn.getPtr();
break;
case ELF::DT_FINI_ARRAY:
BC->FiniArrayAddress = Dyn.getPtr();
break;
case ELF::DT_FINI_ARRAYSZ:
BC->FiniArraySize = Dyn.getPtr();
break; break;
case ELF::DT_RELA: case ELF::DT_RELA:
DynamicRelocationsAddress = Dyn.getPtr(); DynamicRelocationsAddress = Dyn.getPtr();

View File

@ -65,13 +65,6 @@ void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
exit(1); exit(1);
} }
if (!BC.FiniFunctionAddress && !BC.IsStaticExecutable) {
errs() << "BOLT-ERROR: input binary lacks DT_FINI entry in the dynamic "
"section but instrumentation currently relies on patching "
"DT_FINI to write the profile\n";
exit(1);
}
if ((opts::InstrumentationWaitForks || opts::InstrumentationSleepTime) && if ((opts::InstrumentationWaitForks || opts::InstrumentationSleepTime) &&
opts::InstrumentationFileAppendPID) { opts::InstrumentationFileAppendPID) {
errs() errs()

View File

@ -0,0 +1,103 @@
## Test the different ways of hooking the fini function for instrumentation (via
## DT_FINI and via DT_FINI_ARRAY). We test the latter for both PIE and non-PIE
## binaries because of the different ways of handling relocations (static or
## dynamic).
## All tests perform the following steps:
## - Compile and link for the case to be tested
## - Some sanity-checks on the dynamic section and relocations in the binary to
## verify it has the shape we want for testing:
## - DT_FINI or DT_FINI_ARRAY in dynamic section
## - No relative relocations for non-PIE
## - Instrument
## - Verify generated binary
# REQUIRES: system-linux,bolt-runtime,target=aarch64{{.*}}
# RUN: %clang %cflags -pie %s -Wl,-q -o %t.exe
# RUN: llvm-readelf -d %t.exe | FileCheck --check-prefix=DYN-FINI %s
# RUN: llvm-readelf -r %t.exe | FileCheck --check-prefix=RELOC-PIE %s
# RUN: llvm-bolt %t.exe -o %t --instrument
# RUN: llvm-readelf -drs %t | FileCheck --check-prefix=CHECK-FINI %s
# RUN: %clang %cflags -pie %s -Wl,-q,-fini=0 -o %t-no-fini.exe
# RUN: llvm-readelf -d %t-no-fini.exe | FileCheck --check-prefix=DYN-NO-FINI %s
# RUN: llvm-readelf -r %t-no-fini.exe | FileCheck --check-prefix=RELOC-PIE %s
# RUN: llvm-bolt %t-no-fini.exe -o %t-no-fini --instrument
# RUN: llvm-readelf -drs %t-no-fini | FileCheck --check-prefix=CHECK-NO-FINI %s
# RUN: llvm-readelf -ds -x .fini_array %t-no-fini | FileCheck --check-prefix=CHECK-NO-FINI-RELOC %s
## Create a dummy shared library to link against to force creation of the dynamic section.
# RUN: %clang %cflags %p/../Inputs/stub.c -fPIC -shared -o %t-stubs.so
# RUN: %clang %cflags %s -no-pie -Wl,-q,-fini=0 %t-stub.so -o %t-no-pie-no-fini.exe
# RUN: llvm-readelf -r %t-no-pie-no-fini.exe | FileCheck --check-prefix=RELOC-NO-PIE %s
# RUN: llvm-bolt %t-no-pie-no-fini.exe -o %t-no-pie-no-fini --instrument
# RUN: llvm-readelf -ds -x .fini_array %t-no-pie-no-fini | FileCheck --check-prefix=CHECK-NO-PIE-NO-FINI %s
## With fini: dynamic section should contain DT_FINI
# DYN-FINI: (FINI)
## Without fini: dynamic section should only contain DT_FINI_ARRAY
# DYN-NO-FINI-NOT: (FINI)
# DYN-NO-FINI: (FINI_ARRAY)
# DYN-NO-FINI: (FINI_ARRAYSZ)
## With PIE: binary should have relative relocations
# RELOC-PIE: R_AARCH64_RELATIVE
## Without PIE: binary should not have relative relocations
# RELOC-NO-PIE-NOT: R_AARCH64_RELATIVE
## Check that DT_FINI is set to __bolt_runtime_fini
# CHECK-FINI: Dynamic section at offset {{.*}} contains {{.*}} entries:
# CHECK-FINI-DAG: (FINI) 0x[[FINI:[[:xdigit:]]+]]
# CHECK-FINI-DAG: (FINI_ARRAY) 0x[[FINI_ARRAY:[[:xdigit:]]+]]
## Check that the dynamic relocation at .fini_array was not patched
# CHECK-FINI: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
# CHECK-FINI-NOT: {{0+}}[[FINI_ARRAY]] {{.*}} R_AARCH64_RELATIVE [[FINI]]
# CHECK-FINI: Symbol table '.symtab' contains {{.*}} entries:
# CHECK-FINI: {{0+}}[[FINI]] {{.*}} __bolt_runtime_fini
## Check that DT_FINI_ARRAY has a dynamic relocation for __bolt_runtime_fini
# CHECK-NO-FINI: Dynamic section at offset {{.*}} contains {{.*}} entries:
# CHECK-NO-FINI-NOT: (FINI)
# CHECK-NO-FINI: (FINI_ARRAY) 0x[[FINI_ARRAY:[[:xdigit:]]+]]
# CHECK-NO-FINI: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
# CHECK-NO-FINI: {{0+}}[[FINI_ARRAY]] {{.*}} R_AARCH64_RELATIVE [[FINI_ADDR:[[:xdigit:]]+]]
# CHECK-NO-FINI: Symbol table '.symtab' contains {{.*}} entries:
# CHECK-NO-FINI: {{0+}}[[FINI_ADDR]] {{.*}} __bolt_runtime_fini
## Check that the static relocation in .fini_array is patched even for PIE
# CHECK-NO-FINI-RELOC: Dynamic section at offset {{.*}} contains {{.*}} entries:
# CHECK-NO-FINI-RELOC: (FINI_ARRAY) 0x[[FINI_ARRAY:[[:xdigit:]]+]]
# CHECK-NO-FINI-RELOC: Symbol table '.symtab' contains {{.*}} entries:
## Read bytes separately so we can reverse them later
# CHECK-NO-FINI-RELOC: {{0+}}[[FINI_ADDR_B0:[[:xdigit:]]{2}]][[FINI_ADDR_B1:[[:xdigit:]]{2}]][[FINI_ADDR_B2:[[:xdigit:]]{2}]][[FINI_ADDR_B3:[[:xdigit:]]{2}]] {{.*}} __bolt_runtime_fini
# CHECK-NO-FINI-RELOC: Hex dump of section '.fini_array':
# CHECK-NO-FINI-RELOC: 0x{{0+}}[[FINI_ARRAY]] [[FINI_ADDR_B3]][[FINI_ADDR_B2]][[FINI_ADDR_B1]][[FINI_ADDR_B0]] 00000000
## Check that DT_FINI_ARRAY has static relocation applied for __bolt_runtime_fini
# CHECK-NO-PIE-NO-FINI: Dynamic section at offset {{.*}} contains {{.*}} entries:
# CHECK-NO-PIE-NO-FINI-NOT: (FINI)
# CHECK-NO-PIE-NO-FINI: (FINI_ARRAY) 0x[[FINI_ARRAY:[a-f0-9]+]]
# CHECK-NO-PIE-NO-FINI: Symbol table '.symtab' contains {{.*}} entries:
## Read address bytes separately so we can reverse them later
# CHECK-NO-PIE-NO-FINI: {{0+}}[[FINI_ADDR_B0:[[:xdigit:]]{2}]][[FINI_ADDR_B1:[[:xdigit:]]{2}]][[FINI_ADDR_B2:[[:xdigit:]]{2}]][[FINI_ADDR_B3:[[:xdigit:]]{2}]] {{.*}} __bolt_runtime_fini
# CHECK-NO-PIE-NO-FINI: Hex dump of section '.fini_array':
# CHECK-NO-PIE-NO-FINI: 0x{{0+}}[[FINI_ARRAY]] [[FINI_ADDR_B3]][[FINI_ADDR_B2]][[FINI_ADDR_B1]][[FINI_ADDR_B0]] 00000000
.globl _start
.type _start, %function
_start:
# Dummy relocation to force relocation mode.
.reloc 0, R_AARCH64_NONE
ret
.size _start, .-_start
.globl _fini
.type _fini, %function
_fini:
ret
.size _fini, .-_fini
.section .fini_array,"aw"
.align 3
.dword _fini

View File

@ -0,0 +1,61 @@
# Test the different ways of hooking the fini function for instrumentation (via
# DT_FINI and via DT_FINI_ARRAY). We test the latter for both PIE and non-PIE
# binaries because of the different ways of handling relocations (static or
# dynamic).
# All tests perform the following steps:
# - Compile and link for the case to be tested
# - Some sanity-checks on the dynamic section and relocations in the binary to
# verify it has the shape we want for testing:
# - DT_FINI or DT_FINI_ARRAY in dynamic section
# - No relative relocations for non-PIE
# - Instrument
# - Run instrumented binary
# - Verify generated profile
REQUIRES: system-linux,bolt-runtime
RUN: %clang %cflags -pie %p/Inputs/basic-instrumentation.s -Wl,-q -o %t.exe
RUN: llvm-readelf -d %t.exe | FileCheck --check-prefix=DYN-FINI %s
RUN: llvm-readelf -r %t.exe | FileCheck --check-prefix=RELOC-PIE %s
RUN: llvm-bolt %t.exe -o %t --instrument \
RUN: --instrumentation-file=%t \
RUN: --instrumentation-file-append-pid
RUN: rm -f %t.*.fdata
RUN: %t
RUN: cat %t.*.fdata | FileCheck %s
RUN: %clang %cflags -pie %p/Inputs/basic-instrumentation.s -Wl,-q,-fini=0 -o %t-no-fini.exe
RUN: llvm-readelf -d %t-no-fini.exe | FileCheck --check-prefix=DYN-NO-FINI %s
RUN: llvm-readelf -r %t-no-fini.exe | FileCheck --check-prefix=RELOC-PIE %s
RUN: llvm-bolt %t-no-fini.exe -o %t-no-fini --instrument \
RUN: --instrumentation-file=%t-no-fini \
RUN: --instrumentation-file-append-pid
RUN: rm -f %t-no-fini.*.fdata
RUN: %t-no-fini
RUN: cat %t-no-fini.*.fdata | FileCheck %s
RUN: %clang %cflags -no-pie %p/Inputs/basic-instrumentation.s -Wl,-q,-fini=0 -o %t-no-pie-no-fini.exe
RUN: llvm-readelf -d %t-no-pie-no-fini.exe | FileCheck --check-prefix=DYN-NO-FINI %s
RUN: llvm-readelf -r %t-no-pie-no-fini.exe | FileCheck --check-prefix=RELOC-NO-PIE %s
RUN: llvm-bolt %t-no-pie-no-fini.exe -o %t-no-pie-no-fini --instrument \
RUN: --instrumentation-file=%t-no-pie-no-fini \
RUN: --instrumentation-file-append-pid
RUN: rm -f %t-no-pie-no-fini.*.fdata
RUN: %t-no-pie-no-fini
RUN: cat %t-no-pie-no-fini.*.fdata | FileCheck %s
# With fini: dynamic section should contain DT_FINI
DYN-FINI: (FINI)
# Without fini: dynamic section should only contain DT_FINI_ARRAY
DYN-NO-FINI-NOT: (FINI)
DYN-NO-FINI: (FINI_ARRAY)
DYN-NO-FINI: (FINI_ARRAYSZ)
# With PIE: binary should have relative relocations
RELOC-PIE: R_AARCH64_RELATIVE
# Without PIE: binary should not have relative relocations
RELOC-NO-PIE-NOT: R_AARCH64_RELATIVE
# The instrumented profile should at least say main was called once
CHECK: main 0 0 1{{$}}