mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-09 17:43:57 +00:00
821480d27f
Summary: Avoid directly allocating string and description tables in binary's static data region, since they are not needed during runtime except when writing the profile at exit. Change the runtime library to open the tables on disk and read only when necessary. (cherry picked from FBD16626030)
286 lines
9.0 KiB
C++
286 lines
9.0 KiB
C++
//===-- instr.cpp -----------------------------------------------*- C++ -*-===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
// This file contains code that is linked to the final binary with a function
|
|
// that is called at program exit to dump instrumented data collected during
|
|
// execution.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// BOLT runtime instrumentation library for x86 Linux.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include <cstdint>
|
|
#include <elf.h>
|
|
|
|
// All extern declarations here need to be defined by BOLT itself.
|
|
|
|
// Counters inserted by instrumentation, incremented during runtime when
|
|
// points of interest (locations) in the program are reached.
|
|
extern uint64_t __bolt_instr_locations[];
|
|
// Number of counters.
|
|
extern uint32_t __bolt_instr_num_locs;
|
|
// Filename to dump data to.
|
|
extern char __bolt_instr_filename[];
|
|
|
|
// A location is a function name plus offset. Function name needs to be
|
|
// retrieved from the string table and is stored as an index to this table.
|
|
struct Location {
|
|
uint32_t FunctionName;
|
|
uint32_t Offset;
|
|
};
|
|
|
|
// An edge description defines an instrumented edge in the program, fully
|
|
// identified by where the jump is located and its destination.
|
|
struct EdgeDescription {
|
|
Location From;
|
|
Location To;
|
|
};
|
|
|
|
// These need to be read from disk. They are generated by BOLT and written to
|
|
// an ELF note section in the binary itself.
|
|
struct InstrumentationInfo {
|
|
EdgeDescription *Descriptions;
|
|
char *Strings; // String table with function names used in this binary
|
|
int FileDesc; // File descriptor for the file on disk backing this
|
|
// information in memory via mmap
|
|
uint8_t *MMapPtr; // The mmap ptr
|
|
int MMapSize; // The mmap size
|
|
};
|
|
|
|
// Declare some syscall wrappers we use throughout this code to avoid linking
|
|
// against system libc.
|
|
static uint64_t
|
|
__open(const char *pathname,
|
|
uint64_t flags,
|
|
uint64_t mode) {
|
|
uint64_t ret;
|
|
__asm__ __volatile__ (
|
|
"movq $2, %%rax\n"
|
|
"syscall"
|
|
: "=a"(ret)
|
|
: "D"(pathname), "S"(flags), "d"(mode)
|
|
: "cc", "rcx", "r11", "memory");
|
|
return ret;
|
|
}
|
|
|
|
static uint64_t __write(uint64_t fd, const void *buf, uint64_t count) {
|
|
uint64_t ret;
|
|
__asm__ __volatile__ (
|
|
"movq $1, %%rax\n"
|
|
"syscall\n"
|
|
: "=a"(ret)
|
|
: "D"(fd), "S"(buf), "d"(count)
|
|
: "cc", "rcx", "r11", "memory");
|
|
return ret;
|
|
}
|
|
|
|
static uint64_t __lseek(uint64_t fd, uint64_t pos, uint64_t whence) {
|
|
uint64_t ret;
|
|
__asm__ __volatile__ (
|
|
"movq $8, %%rax\n"
|
|
"syscall\n"
|
|
: "=a"(ret)
|
|
: "D"(fd), "S"(pos), "d"(whence)
|
|
: "cc", "rcx", "r11", "memory");
|
|
return ret;
|
|
}
|
|
|
|
static int __close(uint64_t fd) {
|
|
uint64_t ret;
|
|
__asm__ __volatile__ (
|
|
"movq $3, %%rax\n"
|
|
"syscall\n"
|
|
: "=a"(ret)
|
|
: "D"(fd)
|
|
: "cc", "rcx", "r11", "memory");
|
|
return ret;
|
|
}
|
|
|
|
static void *__mmap(uint64_t addr, uint64_t size, uint64_t prot,
|
|
uint64_t flags, uint64_t fd, uint64_t offset) {
|
|
void *ret;
|
|
register uint64_t r8 asm("r8") = fd;
|
|
register uint64_t r9 asm("r9") = offset;
|
|
register uint64_t r10 asm("r10") = flags;
|
|
__asm__ __volatile__ (
|
|
"movq $9, %%rax\n"
|
|
"syscall\n"
|
|
: "=a"(ret)
|
|
: "D"(addr), "S"(size), "d"(prot), "r"(r10), "r"(r8), "r"(r9)
|
|
: "cc", "rcx", "r11", "memory");
|
|
return ret;
|
|
}
|
|
|
|
static uint64_t __munmap(void *addr, uint64_t size) {
|
|
uint64_t ret;
|
|
__asm__ __volatile__ (
|
|
"movq $11, %%rax\n"
|
|
"syscall\n"
|
|
: "=a"(ret)
|
|
: "D"(addr), "S"(size)
|
|
: "cc", "rcx", "r11", "memory");
|
|
return ret;
|
|
}
|
|
|
|
static uint64_t __exit(uint64_t code) {
|
|
uint64_t ret;
|
|
__asm__ __volatile__ (
|
|
"movq $231, %%rax\n"
|
|
"syscall\n"
|
|
: "=a"(ret)
|
|
: "D"(code)
|
|
: "cc", "rcx", "r11", "memory");
|
|
return ret;
|
|
}
|
|
|
|
// Helper functions for writing strings to the .fdata file
|
|
|
|
// Write number Num using Base to the buffer in OutBuf, returns a pointer to
|
|
// the end of the string.
|
|
static char *intToStr(char *OutBuf, uint32_t Num, uint32_t Base) {
|
|
const char *Chars = "0123456789abcdef";
|
|
char Buf[20];
|
|
char *Ptr = Buf;
|
|
while (Num) {
|
|
*Ptr++ = *(Chars + (Num % Base));
|
|
Num /= Base;
|
|
}
|
|
if (Ptr == Buf) {
|
|
*OutBuf++ = '0';
|
|
return OutBuf;
|
|
}
|
|
while (Ptr != Buf) {
|
|
*OutBuf++ = *--Ptr;
|
|
}
|
|
return OutBuf;
|
|
}
|
|
|
|
// Copy Str to OutBuf, returns a pointer to the end of the copied string.
|
|
static char *strCopy(char *OutBuf, const char *Str) {
|
|
while (*Str)
|
|
*OutBuf++ = *Str++;
|
|
return OutBuf;
|
|
}
|
|
|
|
// Print Msg to STDERR and quits with error code 1.
|
|
static void reportError(const char *Msg, uint64_t Size) {
|
|
__write(2, Msg, Size);
|
|
__exit(1);
|
|
}
|
|
|
|
// Perform a string comparison and returns zero if Str1 matches Str2. Compares
|
|
// at most Size characters.
|
|
static int compareStr(const char *Str1, const char *Str2, int Size) {
|
|
while (*Str1 == *Str2) {
|
|
if (*Str1 == '\0' || --Size == 0)
|
|
return 0;
|
|
++Str1;
|
|
++Str2;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
// Write as a string in OutBuf an identifier for the program point at function
|
|
// whose name is in the string table index FuncStrIndex plus Offset.
|
|
static char *serializeLoc(const InstrumentationInfo &Info, char *OutBuf,
|
|
const Location Loc) {
|
|
// fdata location format: Type Name Offset
|
|
// Type 1 - regular symbol
|
|
OutBuf = strCopy(OutBuf, "1 ");
|
|
const char *Str = Info.Strings + Loc.FunctionName;
|
|
while (*Str) {
|
|
*OutBuf++ = *Str++;
|
|
}
|
|
*OutBuf++ = ' ';
|
|
OutBuf = intToStr(OutBuf, Loc.Offset, 16);
|
|
*OutBuf++ = ' ';
|
|
return OutBuf;
|
|
}
|
|
|
|
// Read and map to memory the descriptions written by BOLT into the executable's
|
|
// notes section
|
|
static InstrumentationInfo readDescriptions() {
|
|
InstrumentationInfo Result;
|
|
uint64_t FD = __open("/proc/self/exe",
|
|
/*flags=*/0 /*O_RDONLY*/,
|
|
/*mode=*/0666);
|
|
Result.FileDesc = FD;
|
|
|
|
// mmap our binary to memory
|
|
uint64_t Size = __lseek(FD, 0, 2 /*SEEK_END*/);
|
|
uint8_t *BinContents = reinterpret_cast<uint8_t *>(
|
|
__mmap(0, Size, 0x1 /* PROT_READ*/, 0x2 /* MAP_PRIVATE*/, FD, 0));
|
|
Result.MMapPtr = BinContents;
|
|
Result.MMapSize = Size;
|
|
Elf64_Ehdr *Hdr = reinterpret_cast<Elf64_Ehdr *>(BinContents);
|
|
Elf64_Shdr *Shdr = reinterpret_cast<Elf64_Shdr *>(BinContents + Hdr->e_shoff);
|
|
Elf64_Shdr *StringTblHeader = reinterpret_cast<Elf64_Shdr *>(
|
|
BinContents + Hdr->e_shoff + Hdr->e_shstrndx * Hdr->e_shentsize);
|
|
|
|
// Find .bolt.instr.tables with the data we need and set pointers to it
|
|
for (int I = 0; I < Hdr->e_shnum; ++I) {
|
|
char *SecName = reinterpret_cast<char *>(
|
|
BinContents + StringTblHeader->sh_offset + Shdr->sh_name);
|
|
if (compareStr(SecName, ".bolt.instr.tables", 64) != 0) {
|
|
Shdr = reinterpret_cast<Elf64_Shdr *>(BinContents + Hdr->e_shoff +
|
|
(I + 1) * Hdr->e_shentsize);
|
|
continue;
|
|
}
|
|
// Actual contents of the ELF note start after offset 20 decimal:
|
|
// Offset 0: Producer name size (4 bytes)
|
|
// Offset 4: Contents size (4 bytes)
|
|
// Offset 8: Note type (4 bytes)
|
|
// Offset 12: Producer name (BOLT\0) (5 bytes + align to 4-byte boundary)
|
|
// Offset 20: Contents
|
|
Result.Descriptions =
|
|
reinterpret_cast<EdgeDescription *>(BinContents + Shdr->sh_offset + 20);
|
|
// String table is located after the full EdgeDescriptions table containing
|
|
// __bolt_instr_num_locs entries is finished
|
|
Result.Strings = reinterpret_cast<char *>(
|
|
BinContents + Shdr->sh_offset + 20 +
|
|
(__bolt_instr_num_locs * sizeof(EdgeDescription)));
|
|
return Result;
|
|
}
|
|
const char ErrMsg[] =
|
|
"BOLT instrumentation runtime error: could not find section "
|
|
".bolt.instr.tables\n";
|
|
reportError(ErrMsg, sizeof(ErrMsg));
|
|
return Result;
|
|
}
|
|
|
|
// This is the entry point called at program exit. BOLT patches the executable's
|
|
// FINI entry in the .dynamic section with the address of this function. Our
|
|
// goal here is to flush to disk all instrumentation data in memory, using
|
|
// BOLT's fdata format.
|
|
extern "C" void __bolt_instr_data_dump() {
|
|
const InstrumentationInfo Info = readDescriptions();
|
|
|
|
uint64_t FD = __open(__bolt_instr_filename,
|
|
/*flags=*/0x241 /*O_WRONLY|O_TRUNC|O_CREAT*/,
|
|
/*mode=*/0666);
|
|
|
|
for (int I = 0, E = __bolt_instr_num_locs; I < E; ++I) {
|
|
char LineBuf[2000];
|
|
char *Ptr = LineBuf;
|
|
uint32_t HitCount = __bolt_instr_locations[I];
|
|
if (!HitCount)
|
|
continue;
|
|
|
|
EdgeDescription *Desc = &Info.Descriptions[I];
|
|
Ptr = serializeLoc(Info, Ptr, Desc->From);
|
|
Ptr = serializeLoc(Info, Ptr, Desc->To);
|
|
Ptr = strCopy(Ptr, "0 ");
|
|
Ptr = intToStr(Ptr, HitCount, 10);
|
|
*Ptr++ = '\n';
|
|
__write(FD, LineBuf, Ptr - LineBuf);
|
|
}
|
|
__close(FD);
|
|
__munmap(Info.MMapPtr, Info.MMapSize);
|
|
__close(Info.FileDesc);
|
|
}
|