Reland "[BOLT][Instrumentation] Put Allocator itslef in shared memory by default"

The issue was caused by the absence of placement new definition. It
worked for clang and thus passed Phabricator checks, but broke when
compiled with GCC on buildbot.
Full problem description: https://reviews.llvm.org/D153771#4468239

Original patch description:
In absence of instrumentation-file-append-pid option,
global allocator uses shared pages for allocation. However, since it is a
global variable, it gets COW'd after fork if instrumentation-sleep-time
is used, or if a process forks by itself. This means it handles the same
pages to every process which causes hash table corruption. Thus, if we
want shared pages, we need to put the allocator itself in a shared page,
which we do in this commit in __bolt_instr_setup.
I also added a couple of assertions to sanity-check the hash table.

Reviewed By: rafauler, Amir
Differential Revision: https://reviews.llvm.org/D153771
This commit is contained in:
Denis Revunov 2023-06-20 15:00:36 +00:00 committed by Denis Revunov
parent 7dfcd4b7c9
commit 0cc19b564d

View File

@ -199,6 +199,9 @@ public:
__munmap(StackBase, MaxSize);
}
// Placement operator to construct allocator in possibly shared mmaped memory
static void *operator new(size_t, void *Ptr) { return Ptr; };
private:
static constexpr uint64_t Magic = 0x1122334455667788ull;
uint64_t MaxSize = 0xa00000;
@ -210,7 +213,12 @@ private:
/// Used for allocating indirect call instrumentation counters. Initialized by
/// __bolt_instr_setup, our initialization routine.
BumpPtrAllocator GlobalAlloc;
BumpPtrAllocator *GlobalAlloc;
// Storage for GlobalAlloc which can be shared if not using
// instrumentation-file-append-pid.
void *GlobalMetadataStorage;
} // anonymous namespace
// User-defined placement new operators. We only use those (as opposed to
@ -295,7 +303,17 @@ public:
/// Increment by 1 the value of \p Key. If it is not in this table, it will be
/// added to the table and its value set to 1.
void incrementVal(uint64_t Key, BumpPtrAllocator &Alloc) {
++get(Key, Alloc).Val;
if (!__bolt_instr_conservative) {
TryLock L(M);
if (!L.isLocked())
return;
auto &E = getOrAllocEntry(Key, Alloc);
++E.Val;
return;
}
Lock L(M);
auto &E = getOrAllocEntry(Key, Alloc);
++E.Val;
}
/// Basic member accessing interface. Here we pass the allocator explicitly to
@ -339,10 +357,10 @@ private:
if (Entry.Key == VacantMarker)
continue;
if (Entry.Key & FollowUpTableMarker) {
forEachElement(Callback, IncSize,
reinterpret_cast<MapEntry *>(Entry.Key &
~FollowUpTableMarker),
args...);
MapEntry *Next =
reinterpret_cast<MapEntry *>(Entry.Key & ~FollowUpTableMarker);
assert(Next != Entries, "Circular reference!");
forEachElement(Callback, IncSize, Next, args...);
continue;
}
Callback(Entry, args...);
@ -407,8 +425,11 @@ private:
}
MapEntry &getOrAllocEntry(uint64_t Key, BumpPtrAllocator &Alloc) {
if (TableRoot)
return getEntry(TableRoot, Key, Key, Alloc, 0);
if (TableRoot) {
MapEntry &E = getEntry(TableRoot, Key, Key, Alloc, 0);
assert(!(E.Key & FollowUpTableMarker), "Invalid entry!");
return E;
}
return firstAllocation(Key, Alloc);
}
};
@ -1606,13 +1627,19 @@ extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {
MAP_ANONYMOUS | MapPrivateOrShared | MAP_FIXED, -1, 0);
assert(Ret != MAP_FAILED, "__bolt_instr_setup: Failed to mmap counters!");
// Conservatively reserve 100MiB shared pages
GlobalAlloc.setMaxSize(0x6400000);
GlobalAlloc.setShared(Shared);
GlobalWriteProfileMutex = new (GlobalAlloc, 0) Mutex();
GlobalMetadataStorage = __mmap(0, 4096, PROT_READ | PROT_WRITE,
MapPrivateOrShared | MAP_ANONYMOUS, -1, 0);
assert(GlobalMetadataStorage != MAP_FAILED,
"__bolt_instr_setup: failed to mmap page for metadata!");
GlobalAlloc = new (GlobalMetadataStorage) BumpPtrAllocator;
// Conservatively reserve 100MiB
GlobalAlloc->setMaxSize(0x6400000);
GlobalAlloc->setShared(Shared);
GlobalWriteProfileMutex = new (*GlobalAlloc, 0) Mutex();
if (__bolt_instr_num_ind_calls > 0)
GlobalIndCallCounters =
new (GlobalAlloc, 0) IndirectCallHashTable[__bolt_instr_num_ind_calls];
new (*GlobalAlloc, 0) IndirectCallHashTable[__bolt_instr_num_ind_calls];
if (__bolt_instr_sleep_time != 0) {
// Separate instrumented process to the own process group
@ -1627,7 +1654,7 @@ extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {
extern "C" __attribute((force_align_arg_pointer)) void
instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
GlobalIndCallCounters[IndCallID].incrementVal(Target, GlobalAlloc);
GlobalIndCallCounters[IndCallID].incrementVal(Target, *GlobalAlloc);
}
/// We receive as in-stack arguments the identifier of the indirect call site