mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-23 22:00:10 +00:00
Revert r348335 "[XRay] Move-only Allocator, FunctionCallTrie, and Array"
.. and also the follow-ups r348336 r348338. It broke stand-alone compiler-rt builds with GCC 4.8: In file included from /work/llvm/projects/compiler-rt/lib/xray/xray_function_call_trie.h:20:0, from /work/llvm/projects/compiler-rt/lib/xray/xray_profile_collector.h:21, from /work/llvm/projects/compiler-rt/lib/xray/xray_profile_collector.cc:15: /work/llvm/projects/compiler-rt/lib/xray/xray_segmented_array.h: In instantiation of ‘T* __xray::Array<T>::AppendEmplace(Args&& ...) [with Args = {const __xray::FunctionCallTrie::mergeInto(__xray::FunctionCallTrie&) const::NodeAndTarget&}; T = __xray::FunctionCallTrie::mergeInto(__xray::FunctionCallTrie&) const::NodeAndTarget]’: /work/llvm/projects/compiler-rt/lib/xray/xray_segmented_array.h:383:71: required from ‘T* __xray::Array<T>::Append(const T&) [with T = __xray::FunctionCallTrie::mergeInto(__xray::FunctionCallTrie&) const::NodeAndTarget]’ /work/llvm/projects/compiler-rt/lib/xray/xray_function_call_trie.h:517:54: required from here /work/llvm/projects/compiler-rt/lib/xray/xray_segmented_array.h:378:5: error: could not convert ‘{std::forward<const __xray::FunctionCallTrie::mergeInto(__xray::FunctionCallTrie&) const::NodeAndTarget&>((* & args#0))}’ from ‘<brace-enclosed initializer list>’ to ‘__xray::FunctionCallTrie::mergeInto(__xray::FunctionCallTrie&) const::NodeAndTarget’ new (AlignedOffset) T{std::forward<Args>(args)...}; ^ /work/llvm/projects/compiler-rt/lib/xray/xray_segmented_array.h: In instantiation of ‘T* __xray::Array<T>::AppendEmplace(Args&& ...) [with Args = {const __xray::profileCollectorService::{anonymous}::ThreadTrie&}; T = __xray::profileCollectorService::{anonymous}::ThreadTrie]’: /work/llvm/projects/compiler-rt/lib/xray/xray_segmented_array.h:383:71: required from ‘T* __xray::Array<T>::Append(const T&) [with T = __xray::profileCollectorService::{anonymous}::ThreadTrie]’ /work/llvm/projects/compiler-rt/lib/xray/xray_profile_collector.cc:98:34: required from here /work/llvm/projects/compiler-rt/lib/xray/xray_segmented_array.h:378:5: error: could not convert ‘{std::forward<const __xray::profileCollectorService::{anonymous}::ThreadTrie&>((* & args#0))}’ from ‘<brace-enclosed initializer list>’ to ‘__xray::profileCollectorService::{anonymous}::ThreadTrie’ /work/llvm/projects/compiler-rt/lib/xray/xray_segmented_array.h: In instantiation of ‘T* __xray::Array<T>::AppendEmplace(Args&& ...) [with Args = {const __xray::profileCollectorService::{anonymous}::ProfileBuffer&}; T = __xray::profileCollectorService::{anonymous}::ProfileBuffer]’: /work/llvm/projects/compiler-rt/lib/xray/xray_segmented_array.h:383:71: required from ‘T* __xray::Array<T>::Append(const T&) [with T = __xray::profileCollectorService::{anonymous}::ProfileBuffer] ’ /work/llvm/projects/compiler-rt/lib/xray/xray_profile_collector.cc:244:44: required from here /work/llvm/projects/compiler-rt/lib/xray/xray_segmented_array.h:378:5: error: could not convert ‘{std::forward<const __xray::profileCollectorService::{anonymous}::ProfileBuffer&>((* & args#0))}’ from ‘<brace-enclosed initializer list>’ to ‘__xray::profileCollectorService::{anonymous}::ProfileBuffer’ > Summary: > This change makes the allocator and function call trie implementations > move-aware and remove the FunctionCallTrie's reliance on a > heap-allocated set of allocators. > > The change makes it possible to always have storage associated with > Allocator instances, not necessarily having heap-allocated memory > obtainable from these allocator instances. We also use thread-local > uninitialised storage. > > We've also re-worked the segmented array implementation to have more > precondition and post-condition checks when built in debug mode. This > enables us to better implement some of the operations with surrounding > documentation as well. The `trim` algorithm now has more documentation > on the implementation, reducing the requirement to handle special > conditions, and being more rigorous on the computations involved. > > In this change we also introduce an initialisation guard, through which > we prevent an initialisation operation from racing with a cleanup > operation. > > We also ensure that the ThreadTries array is not destroyed while copies > into the elements are still being performed by other threads submitting > profiles. > > Note that this change still has an issue with accessing thread-local > storage from signal handlers that are instrumented with XRay. We also > learn that with the testing of this patch, that there will be cases > where calls to mmap(...) (through internal_mmap(...)) might be called in > signal handlers, but are not async-signal-safe. Subsequent patches will > address this, by re-using the `BufferQueue` type used in the FDR mode > implementation for pre-allocated memory segments per active, tracing > thread. > > We still want to land this change despite the known issues, with fixes > forthcoming. > > Reviewers: mboerger, jfb > > Subscribers: jfb, llvm-commits > > Differential Revision: https://reviews.llvm.org/D54989 llvm-svn: 348346
This commit is contained in:
parent
0e216854f9
commit
83ff22c297
@ -309,36 +309,6 @@ TEST(FunctionCallTrieTest, MergeInto) {
|
||||
EXPECT_EQ(F2.Callees.size(), 0u);
|
||||
}
|
||||
|
||||
TEST(FunctionCallTrieTest, PlacementNewOnAlignedStorage) {
|
||||
profilingFlags()->setDefaults();
|
||||
typename std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
|
||||
alignof(FunctionCallTrie::Allocators)>::type
|
||||
AllocatorsStorage;
|
||||
new (&AllocatorsStorage)
|
||||
FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
|
||||
auto *A =
|
||||
reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage);
|
||||
|
||||
typename std::aligned_storage<sizeof(FunctionCallTrie),
|
||||
alignof(FunctionCallTrie)>::type FCTStorage;
|
||||
new (&FCTStorage) FunctionCallTrie(*A);
|
||||
auto *T = reinterpret_cast<FunctionCallTrie *>(&FCTStorage);
|
||||
|
||||
// Put some data into it.
|
||||
T->enterFunction(1, 0, 0);
|
||||
T->exitFunction(1, 1, 0);
|
||||
|
||||
// Re-initialize the objects in storage.
|
||||
T->~FunctionCallTrie();
|
||||
A->~Allocators();
|
||||
new (A) FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
|
||||
new (T) FunctionCallTrie(*A);
|
||||
|
||||
// Then put some data into it again.
|
||||
T->enterFunction(1, 0, 0);
|
||||
T->exitFunction(1, 1, 0);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace __xray
|
||||
|
@ -221,91 +221,5 @@ TEST(SegmentedArrayTest, SimulateStackBehaviour) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SegmentedArrayTest, PlacementNewOnAlignedStorage) {
|
||||
using AllocatorType = typename Array<ShadowStackEntry>::AllocatorType;
|
||||
typename std::aligned_storage<sizeof(AllocatorType),
|
||||
alignof(AllocatorType)>::type AllocatorStorage;
|
||||
new (&AllocatorStorage) AllocatorType(1 << 10);
|
||||
auto *A = reinterpret_cast<AllocatorType *>(&AllocatorStorage);
|
||||
typename std::aligned_storage<sizeof(Array<ShadowStackEntry>),
|
||||
alignof(Array<ShadowStackEntry>)>::type
|
||||
ArrayStorage;
|
||||
new (&ArrayStorage) Array<ShadowStackEntry>(*A);
|
||||
auto *Data = reinterpret_cast<Array<ShadowStackEntry> *>(&ArrayStorage);
|
||||
|
||||
static uint64_t Dummy = 0;
|
||||
constexpr uint64_t Max = 9;
|
||||
|
||||
for (uint64_t i = 0; i < Max; ++i) {
|
||||
auto P = Data->Append({i, &Dummy});
|
||||
ASSERT_NE(P, nullptr);
|
||||
ASSERT_EQ(P->NodePtr, &Dummy);
|
||||
auto &Back = Data->back();
|
||||
ASSERT_EQ(Back.NodePtr, &Dummy);
|
||||
ASSERT_EQ(Back.EntryTSC, i);
|
||||
}
|
||||
|
||||
// Simulate a stack by checking the data from the end as we're trimming.
|
||||
auto Counter = Max;
|
||||
ASSERT_EQ(Data->size(), size_t(Max));
|
||||
while (!Data->empty()) {
|
||||
const auto &Top = Data->back();
|
||||
uint64_t *TopNode = Top.NodePtr;
|
||||
EXPECT_EQ(TopNode, &Dummy) << "Counter = " << Counter;
|
||||
Data->trim(1);
|
||||
--Counter;
|
||||
ASSERT_EQ(Data->size(), size_t(Counter));
|
||||
}
|
||||
|
||||
// Once the stack is exhausted, we re-use the storage.
|
||||
for (uint64_t i = 0; i < Max; ++i) {
|
||||
auto P = Data->Append({i, &Dummy});
|
||||
ASSERT_NE(P, nullptr);
|
||||
ASSERT_EQ(P->NodePtr, &Dummy);
|
||||
auto &Back = Data->back();
|
||||
ASSERT_EQ(Back.NodePtr, &Dummy);
|
||||
ASSERT_EQ(Back.EntryTSC, i);
|
||||
}
|
||||
|
||||
// We re-initialize the storage, by calling the destructor and
|
||||
// placement-new'ing again.
|
||||
Data->~Array();
|
||||
A->~AllocatorType();
|
||||
new (A) AllocatorType(1 << 10);
|
||||
new (Data) Array<ShadowStackEntry>(*A);
|
||||
|
||||
// Then re-do the test.
|
||||
for (uint64_t i = 0; i < Max; ++i) {
|
||||
auto P = Data->Append({i, &Dummy});
|
||||
ASSERT_NE(P, nullptr);
|
||||
ASSERT_EQ(P->NodePtr, &Dummy);
|
||||
auto &Back = Data->back();
|
||||
ASSERT_EQ(Back.NodePtr, &Dummy);
|
||||
ASSERT_EQ(Back.EntryTSC, i);
|
||||
}
|
||||
|
||||
// Simulate a stack by checking the data from the end as we're trimming.
|
||||
Counter = Max;
|
||||
ASSERT_EQ(Data->size(), size_t(Max));
|
||||
while (!Data->empty()) {
|
||||
const auto &Top = Data->back();
|
||||
uint64_t *TopNode = Top.NodePtr;
|
||||
EXPECT_EQ(TopNode, &Dummy) << "Counter = " << Counter;
|
||||
Data->trim(1);
|
||||
--Counter;
|
||||
ASSERT_EQ(Data->size(), size_t(Counter));
|
||||
}
|
||||
|
||||
// Once the stack is exhausted, we re-use the storage.
|
||||
for (uint64_t i = 0; i < Max; ++i) {
|
||||
auto P = Data->Append({i, &Dummy});
|
||||
ASSERT_NE(P, nullptr);
|
||||
ASSERT_EQ(P->NodePtr, &Dummy);
|
||||
auto &Back = Data->back();
|
||||
ASSERT_EQ(Back.NodePtr, &Dummy);
|
||||
ASSERT_EQ(Back.EntryTSC, i);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace __xray
|
||||
|
@ -21,8 +21,8 @@
|
||||
#include "sanitizer_common/sanitizer_mutex.h"
|
||||
#if SANITIZER_FUCHSIA
|
||||
#include <zircon/process.h>
|
||||
#include <zircon/status.h>
|
||||
#include <zircon/syscalls.h>
|
||||
#include <zircon/status.h>
|
||||
#else
|
||||
#include "sanitizer_common/sanitizer_posix.h"
|
||||
#endif
|
||||
@ -50,20 +50,20 @@ template <class T> T *allocate() XRAY_NEVER_INSTRUMENT {
|
||||
}
|
||||
uintptr_t B;
|
||||
Status =
|
||||
_zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0,
|
||||
Vmo, 0, sizeof(T), &B);
|
||||
_zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0,
|
||||
Vmo, 0, sizeof(T), &B);
|
||||
_zx_handle_close(Vmo);
|
||||
if (Status != ZX_OK) {
|
||||
if (Verbosity())
|
||||
Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n", sizeof(T),
|
||||
_zx_status_get_string(Status));
|
||||
Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n",
|
||||
sizeof(T), _zx_status_get_string(Status));
|
||||
return nullptr;
|
||||
}
|
||||
return reinterpret_cast<T *>(B);
|
||||
#else
|
||||
uptr B = internal_mmap(NULL, RoundedSize, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
int ErrNo = 0;
|
||||
int ErrNo;
|
||||
if (UNLIKELY(internal_iserror(B, &ErrNo))) {
|
||||
if (Verbosity())
|
||||
Report(
|
||||
@ -80,8 +80,8 @@ template <class T> void deallocate(T *B) XRAY_NEVER_INSTRUMENT {
|
||||
return;
|
||||
uptr RoundedSize = RoundUpTo(sizeof(T), GetPageSizeCached());
|
||||
#if SANITIZER_FUCHSIA
|
||||
_zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast<uintptr_t>(B),
|
||||
RoundedSize);
|
||||
_zx_vmar_unmap(_zx_vmar_root_self(),
|
||||
reinterpret_cast<uintptr_t>(B), RoundedSize);
|
||||
#else
|
||||
internal_munmap(B, RoundedSize);
|
||||
#endif
|
||||
@ -95,24 +95,25 @@ T *allocateBuffer(size_t S) XRAY_NEVER_INSTRUMENT {
|
||||
zx_status_t Status = _zx_vmo_create(RoundedSize, 0, &Vmo);
|
||||
if (Status != ZX_OK) {
|
||||
if (Verbosity())
|
||||
Report("XRay Profiling: Failed to create VMO of size %zu: %s\n", S,
|
||||
_zx_status_get_string(Status));
|
||||
Report("XRay Profiling: Failed to create VMO of size %zu: %s\n",
|
||||
S, _zx_status_get_string(Status));
|
||||
return nullptr;
|
||||
}
|
||||
uintptr_t B;
|
||||
Status = _zx_vmar_map(_zx_vmar_root_self(),
|
||||
ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, Vmo, 0, S, &B);
|
||||
Status =
|
||||
_zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0,
|
||||
Vmo, 0, S, &B);
|
||||
_zx_handle_close(Vmo);
|
||||
if (Status != ZX_OK) {
|
||||
if (Verbosity())
|
||||
Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n", S,
|
||||
_zx_status_get_string(Status));
|
||||
Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n",
|
||||
S, _zx_status_get_string(Status));
|
||||
return nullptr;
|
||||
}
|
||||
#else
|
||||
uptr B = internal_mmap(NULL, RoundedSize, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
int ErrNo = 0;
|
||||
int ErrNo;
|
||||
if (UNLIKELY(internal_iserror(B, &ErrNo))) {
|
||||
if (Verbosity())
|
||||
Report(
|
||||
@ -129,8 +130,7 @@ template <class T> void deallocateBuffer(T *B, size_t S) XRAY_NEVER_INSTRUMENT {
|
||||
return;
|
||||
uptr RoundedSize = RoundUpTo(S * sizeof(T), GetPageSizeCached());
|
||||
#if SANITIZER_FUCHSIA
|
||||
_zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast<uintptr_t>(B),
|
||||
RoundedSize);
|
||||
_zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast<uintptr_t>(B), RoundedSize);
|
||||
#else
|
||||
internal_munmap(B, RoundedSize);
|
||||
#endif
|
||||
@ -171,7 +171,7 @@ template <size_t N> struct Allocator {
|
||||
};
|
||||
|
||||
private:
|
||||
size_t MaxMemory{0};
|
||||
const size_t MaxMemory{0};
|
||||
unsigned char *BackingStore = nullptr;
|
||||
unsigned char *AlignedNextBlock = nullptr;
|
||||
size_t AllocatedBlocks = 0;
|
||||
@ -223,43 +223,7 @@ private:
|
||||
|
||||
public:
|
||||
explicit Allocator(size_t M) XRAY_NEVER_INSTRUMENT
|
||||
: MaxMemory(RoundUpTo(M, kCacheLineSize)),
|
||||
BackingStore(nullptr),
|
||||
AlignedNextBlock(nullptr),
|
||||
AllocatedBlocks(0),
|
||||
Mutex() {}
|
||||
|
||||
Allocator(const Allocator &) = delete;
|
||||
Allocator &operator=(const Allocator &) = delete;
|
||||
|
||||
Allocator(Allocator &&O) XRAY_NEVER_INSTRUMENT {
|
||||
SpinMutexLock L0(&Mutex);
|
||||
SpinMutexLock L1(&O.Mutex);
|
||||
MaxMemory = O.MaxMemory;
|
||||
O.MaxMemory = 0;
|
||||
BackingStore = O.BackingStore;
|
||||
O.BackingStore = nullptr;
|
||||
AlignedNextBlock = O.AlignedNextBlock;
|
||||
O.AlignedNextBlock = nullptr;
|
||||
AllocatedBlocks = O.AllocatedBlocks;
|
||||
O.AllocatedBlocks = 0;
|
||||
}
|
||||
|
||||
Allocator &operator=(Allocator &&O) XRAY_NEVER_INSTRUMENT {
|
||||
SpinMutexLock L0(&Mutex);
|
||||
SpinMutexLock L1(&O.Mutex);
|
||||
MaxMemory = O.MaxMemory;
|
||||
O.MaxMemory = 0;
|
||||
if (BackingStore != nullptr)
|
||||
deallocateBuffer(BackingStore, MaxMemory);
|
||||
BackingStore = O.BackingStore;
|
||||
O.BackingStore = nullptr;
|
||||
AlignedNextBlock = O.AlignedNextBlock;
|
||||
O.AlignedNextBlock = nullptr;
|
||||
AllocatedBlocks = O.AllocatedBlocks;
|
||||
O.AllocatedBlocks = 0;
|
||||
return *this;
|
||||
}
|
||||
: MaxMemory(RoundUpTo(M, kCacheLineSize)) {}
|
||||
|
||||
Block Allocate() XRAY_NEVER_INSTRUMENT { return {Alloc()}; }
|
||||
|
||||
|
@ -98,6 +98,9 @@ public:
|
||||
struct NodeIdPair {
|
||||
Node *NodePtr;
|
||||
int32_t FId;
|
||||
|
||||
// Constructor for inplace-construction.
|
||||
NodeIdPair(Node *N, int32_t F) : NodePtr(N), FId(F) {}
|
||||
};
|
||||
|
||||
using NodeIdPairArray = Array<NodeIdPair>;
|
||||
@ -115,6 +118,15 @@ public:
|
||||
uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
|
||||
int32_t FId;
|
||||
|
||||
// We add a constructor here to allow us to inplace-construct through
|
||||
// Array<...>'s AppendEmplace.
|
||||
Node(Node *P, NodeIdPairAllocatorType &A, uint64_t CC, uint64_t CLT,
|
||||
int32_t F) XRAY_NEVER_INSTRUMENT : Parent(P),
|
||||
Callees(A),
|
||||
CallCount(CC),
|
||||
CumulativeLocalTime(CLT),
|
||||
FId(F) {}
|
||||
|
||||
// TODO: Include the compact histogram.
|
||||
};
|
||||
|
||||
@ -123,6 +135,13 @@ private:
|
||||
uint64_t EntryTSC;
|
||||
Node *NodePtr;
|
||||
uint16_t EntryCPU;
|
||||
|
||||
// We add a constructor here to allow us to inplace-construct through
|
||||
// Array<...>'s AppendEmplace.
|
||||
ShadowStackEntry(uint64_t T, Node *N, uint16_t C) XRAY_NEVER_INSTRUMENT
|
||||
: EntryTSC{T},
|
||||
NodePtr{N},
|
||||
EntryCPU{C} {}
|
||||
};
|
||||
|
||||
using NodeArray = Array<Node>;
|
||||
@ -137,71 +156,20 @@ public:
|
||||
using RootAllocatorType = RootArray::AllocatorType;
|
||||
using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
|
||||
|
||||
// Use hosted aligned storage members to allow for trivial move and init.
|
||||
// This also allows us to sidestep the potential-failing allocation issue.
|
||||
typename std::aligned_storage<sizeof(NodeAllocatorType),
|
||||
alignof(NodeAllocatorType)>::type
|
||||
NodeAllocatorStorage;
|
||||
typename std::aligned_storage<sizeof(RootAllocatorType),
|
||||
alignof(RootAllocatorType)>::type
|
||||
RootAllocatorStorage;
|
||||
typename std::aligned_storage<sizeof(ShadowStackAllocatorType),
|
||||
alignof(ShadowStackAllocatorType)>::type
|
||||
ShadowStackAllocatorStorage;
|
||||
typename std::aligned_storage<sizeof(NodeIdPairAllocatorType),
|
||||
alignof(NodeIdPairAllocatorType)>::type
|
||||
NodeIdPairAllocatorStorage;
|
||||
|
||||
NodeAllocatorType *NodeAllocator = nullptr;
|
||||
RootAllocatorType *RootAllocator = nullptr;
|
||||
ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
|
||||
NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
|
||||
|
||||
Allocators() = default;
|
||||
Allocators() {}
|
||||
Allocators(const Allocators &) = delete;
|
||||
Allocators &operator=(const Allocators &) = delete;
|
||||
|
||||
explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT {
|
||||
new (&NodeAllocatorStorage) NodeAllocatorType(Max);
|
||||
NodeAllocator =
|
||||
reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
|
||||
|
||||
new (&RootAllocatorStorage) RootAllocatorType(Max);
|
||||
RootAllocator =
|
||||
reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
|
||||
|
||||
new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max);
|
||||
ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
|
||||
&ShadowStackAllocatorStorage);
|
||||
|
||||
new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max);
|
||||
NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
|
||||
&NodeIdPairAllocatorStorage);
|
||||
}
|
||||
|
||||
Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT {
|
||||
// Here we rely on the safety of memcpy'ing contents of the storage
|
||||
// members, and then pointing the source pointers to nullptr.
|
||||
internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage,
|
||||
sizeof(NodeAllocatorType));
|
||||
internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage,
|
||||
sizeof(RootAllocatorType));
|
||||
internal_memcpy(&ShadowStackAllocatorStorage,
|
||||
&O.ShadowStackAllocatorStorage,
|
||||
sizeof(ShadowStackAllocatorType));
|
||||
internal_memcpy(&NodeIdPairAllocatorStorage,
|
||||
&O.NodeIdPairAllocatorStorage,
|
||||
sizeof(NodeIdPairAllocatorType));
|
||||
|
||||
NodeAllocator =
|
||||
reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
|
||||
RootAllocator =
|
||||
reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
|
||||
ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
|
||||
&ShadowStackAllocatorStorage);
|
||||
NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
|
||||
&NodeIdPairAllocatorStorage);
|
||||
|
||||
Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT
|
||||
: NodeAllocator(O.NodeAllocator),
|
||||
RootAllocator(O.RootAllocator),
|
||||
ShadowStackAllocator(O.ShadowStackAllocator),
|
||||
NodeIdPairAllocator(O.NodeIdPairAllocator) {
|
||||
O.NodeAllocator = nullptr;
|
||||
O.RootAllocator = nullptr;
|
||||
O.ShadowStackAllocator = nullptr;
|
||||
@ -209,77 +177,79 @@ public:
|
||||
}
|
||||
|
||||
Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT {
|
||||
// When moving into an existing instance, we ensure that we clean up the
|
||||
// current allocators.
|
||||
if (NodeAllocator)
|
||||
NodeAllocator->~NodeAllocatorType();
|
||||
if (O.NodeAllocator) {
|
||||
new (&NodeAllocatorStorage)
|
||||
NodeAllocatorType(std::move(*O.NodeAllocator));
|
||||
NodeAllocator =
|
||||
reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
|
||||
O.NodeAllocator = nullptr;
|
||||
} else {
|
||||
NodeAllocator = nullptr;
|
||||
{
|
||||
auto Tmp = O.NodeAllocator;
|
||||
O.NodeAllocator = this->NodeAllocator;
|
||||
this->NodeAllocator = Tmp;
|
||||
}
|
||||
|
||||
if (RootAllocator)
|
||||
RootAllocator->~RootAllocatorType();
|
||||
if (O.RootAllocator) {
|
||||
new (&RootAllocatorStorage)
|
||||
RootAllocatorType(std::move(*O.RootAllocator));
|
||||
RootAllocator =
|
||||
reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
|
||||
O.RootAllocator = nullptr;
|
||||
} else {
|
||||
RootAllocator = nullptr;
|
||||
{
|
||||
auto Tmp = O.RootAllocator;
|
||||
O.RootAllocator = this->RootAllocator;
|
||||
this->RootAllocator = Tmp;
|
||||
}
|
||||
|
||||
if (ShadowStackAllocator)
|
||||
ShadowStackAllocator->~ShadowStackAllocatorType();
|
||||
if (O.ShadowStackAllocator) {
|
||||
new (&ShadowStackAllocatorStorage)
|
||||
ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator));
|
||||
ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
|
||||
&ShadowStackAllocatorStorage);
|
||||
O.ShadowStackAllocator = nullptr;
|
||||
} else {
|
||||
ShadowStackAllocator = nullptr;
|
||||
{
|
||||
auto Tmp = O.ShadowStackAllocator;
|
||||
O.ShadowStackAllocator = this->ShadowStackAllocator;
|
||||
this->ShadowStackAllocator = Tmp;
|
||||
}
|
||||
|
||||
if (NodeIdPairAllocator)
|
||||
NodeIdPairAllocator->~NodeIdPairAllocatorType();
|
||||
if (O.NodeIdPairAllocator) {
|
||||
new (&NodeIdPairAllocatorStorage)
|
||||
NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator));
|
||||
NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
|
||||
&NodeIdPairAllocatorStorage);
|
||||
O.NodeIdPairAllocator = nullptr;
|
||||
} else {
|
||||
NodeIdPairAllocator = nullptr;
|
||||
{
|
||||
auto Tmp = O.NodeIdPairAllocator;
|
||||
O.NodeIdPairAllocator = this->NodeIdPairAllocator;
|
||||
this->NodeIdPairAllocator = Tmp;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
~Allocators() XRAY_NEVER_INSTRUMENT {
|
||||
if (NodeAllocator != nullptr)
|
||||
// Note that we cannot use delete on these pointers, as they need to be
|
||||
// returned to the sanitizer_common library's internal memory tracking
|
||||
// system.
|
||||
if (NodeAllocator != nullptr) {
|
||||
NodeAllocator->~NodeAllocatorType();
|
||||
if (RootAllocator != nullptr)
|
||||
deallocate(NodeAllocator);
|
||||
NodeAllocator = nullptr;
|
||||
}
|
||||
if (RootAllocator != nullptr) {
|
||||
RootAllocator->~RootAllocatorType();
|
||||
if (ShadowStackAllocator != nullptr)
|
||||
deallocate(RootAllocator);
|
||||
RootAllocator = nullptr;
|
||||
}
|
||||
if (ShadowStackAllocator != nullptr) {
|
||||
ShadowStackAllocator->~ShadowStackAllocatorType();
|
||||
if (NodeIdPairAllocator != nullptr)
|
||||
deallocate(ShadowStackAllocator);
|
||||
ShadowStackAllocator = nullptr;
|
||||
}
|
||||
if (NodeIdPairAllocator != nullptr) {
|
||||
NodeIdPairAllocator->~NodeIdPairAllocatorType();
|
||||
deallocate(NodeIdPairAllocator);
|
||||
NodeIdPairAllocator = nullptr;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: Support configuration of options through the arguments.
|
||||
static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT {
|
||||
return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max);
|
||||
}
|
||||
|
||||
static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT {
|
||||
Allocators A(Max);
|
||||
Allocators A;
|
||||
auto NodeAllocator = allocate<Allocators::NodeAllocatorType>();
|
||||
new (NodeAllocator) Allocators::NodeAllocatorType(Max);
|
||||
A.NodeAllocator = NodeAllocator;
|
||||
|
||||
auto RootAllocator = allocate<Allocators::RootAllocatorType>();
|
||||
new (RootAllocator) Allocators::RootAllocatorType(Max);
|
||||
A.RootAllocator = RootAllocator;
|
||||
|
||||
auto ShadowStackAllocator =
|
||||
allocate<Allocators::ShadowStackAllocatorType>();
|
||||
new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max);
|
||||
A.ShadowStackAllocator = ShadowStackAllocator;
|
||||
|
||||
auto NodeIdPairAllocator = allocate<NodeIdPairAllocatorType>();
|
||||
new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max);
|
||||
A.NodeIdPairAllocator = NodeIdPairAllocator;
|
||||
return A;
|
||||
}
|
||||
|
||||
@ -287,38 +257,14 @@ private:
|
||||
NodeArray Nodes;
|
||||
RootArray Roots;
|
||||
ShadowStackArray ShadowStack;
|
||||
NodeIdPairAllocatorType *NodeIdPairAllocator;
|
||||
uint32_t OverflowedFunctions;
|
||||
NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
|
||||
|
||||
public:
|
||||
explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT
|
||||
: Nodes(*A.NodeAllocator),
|
||||
Roots(*A.RootAllocator),
|
||||
ShadowStack(*A.ShadowStackAllocator),
|
||||
NodeIdPairAllocator(A.NodeIdPairAllocator),
|
||||
OverflowedFunctions(0) {}
|
||||
|
||||
FunctionCallTrie() = delete;
|
||||
FunctionCallTrie(const FunctionCallTrie &) = delete;
|
||||
FunctionCallTrie &operator=(const FunctionCallTrie &) = delete;
|
||||
|
||||
FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT
|
||||
: Nodes(std::move(O.Nodes)),
|
||||
Roots(std::move(O.Roots)),
|
||||
ShadowStack(std::move(O.ShadowStack)),
|
||||
NodeIdPairAllocator(O.NodeIdPairAllocator),
|
||||
OverflowedFunctions(O.OverflowedFunctions) {}
|
||||
|
||||
FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT {
|
||||
Nodes = std::move(O.Nodes);
|
||||
Roots = std::move(O.Roots);
|
||||
ShadowStack = std::move(O.ShadowStack);
|
||||
NodeIdPairAllocator = O.NodeIdPairAllocator;
|
||||
OverflowedFunctions = O.OverflowedFunctions;
|
||||
return *this;
|
||||
}
|
||||
|
||||
~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {}
|
||||
NodeIdPairAllocator(A.NodeIdPairAllocator) {}
|
||||
|
||||
void enterFunction(const int32_t FId, uint64_t TSC,
|
||||
uint16_t CPU) XRAY_NEVER_INSTRUMENT {
|
||||
@ -326,17 +272,12 @@ public:
|
||||
// This function primarily deals with ensuring that the ShadowStack is
|
||||
// consistent and ready for when an exit event is encountered.
|
||||
if (UNLIKELY(ShadowStack.empty())) {
|
||||
auto NewRoot = Nodes.AppendEmplace(
|
||||
nullptr, NodeIdPairArray{*NodeIdPairAllocator}, 0u, 0u, FId);
|
||||
auto NewRoot =
|
||||
Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0u, 0u, FId);
|
||||
if (UNLIKELY(NewRoot == nullptr))
|
||||
return;
|
||||
if (Roots.Append(NewRoot) == nullptr)
|
||||
return;
|
||||
if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) {
|
||||
Roots.trim(1);
|
||||
++OverflowedFunctions;
|
||||
return;
|
||||
}
|
||||
Roots.Append(NewRoot);
|
||||
ShadowStack.AppendEmplace(TSC, NewRoot, CPU);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -350,39 +291,29 @@ public:
|
||||
[FId](const NodeIdPair &NR) { return NR.FId == FId; });
|
||||
if (Callee != nullptr) {
|
||||
CHECK_NE(Callee->NodePtr, nullptr);
|
||||
if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr)
|
||||
++OverflowedFunctions;
|
||||
ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU);
|
||||
return;
|
||||
}
|
||||
|
||||
// This means we've never seen this stack before, create a new node here.
|
||||
auto NewNode = Nodes.AppendEmplace(
|
||||
TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
|
||||
auto NewNode =
|
||||
Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0u, 0u, FId);
|
||||
if (UNLIKELY(NewNode == nullptr))
|
||||
return;
|
||||
DCHECK_NE(NewNode, nullptr);
|
||||
TopNode->Callees.AppendEmplace(NewNode, FId);
|
||||
if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr)
|
||||
++OverflowedFunctions;
|
||||
ShadowStack.AppendEmplace(TSC, NewNode, CPU);
|
||||
DCHECK_NE(ShadowStack.back().NodePtr, nullptr);
|
||||
return;
|
||||
}
|
||||
|
||||
void exitFunction(int32_t FId, uint64_t TSC,
|
||||
uint16_t CPU) XRAY_NEVER_INSTRUMENT {
|
||||
// If we're exiting functions that have "overflowed" or don't fit into the
|
||||
// stack due to allocator constraints, we then decrement that count first.
|
||||
if (OverflowedFunctions) {
|
||||
--OverflowedFunctions;
|
||||
return;
|
||||
}
|
||||
|
||||
// When we exit a function, we look up the ShadowStack to see whether we've
|
||||
// entered this function before. We do as little processing here as we can,
|
||||
// since most of the hard work would have already been done at function
|
||||
// entry.
|
||||
uint64_t CumulativeTreeTime = 0;
|
||||
|
||||
while (!ShadowStack.empty()) {
|
||||
const auto &Top = ShadowStack.back();
|
||||
auto TopNode = Top.NodePtr;
|
||||
@ -449,7 +380,7 @@ public:
|
||||
for (const auto Root : getRoots()) {
|
||||
// Add a node in O for this root.
|
||||
auto NewRoot = O.Nodes.AppendEmplace(
|
||||
nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount,
|
||||
nullptr, *O.NodeIdPairAllocator, Root->CallCount,
|
||||
Root->CumulativeLocalTime, Root->FId);
|
||||
|
||||
// Because we cannot allocate more memory we should bail out right away.
|
||||
@ -468,9 +399,8 @@ public:
|
||||
DFSStack.trim(1);
|
||||
for (const auto Callee : NP.Node->Callees) {
|
||||
auto NewNode = O.Nodes.AppendEmplace(
|
||||
NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator),
|
||||
Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime,
|
||||
Callee.FId);
|
||||
NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount,
|
||||
Callee.NodePtr->CumulativeLocalTime, Callee.FId);
|
||||
if (UNLIKELY(NewNode == nullptr))
|
||||
return;
|
||||
NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId);
|
||||
@ -503,9 +433,8 @@ public:
|
||||
auto R = O.Roots.find_element(
|
||||
[&](const Node *Node) { return Node->FId == Root->FId; });
|
||||
if (R == nullptr) {
|
||||
TargetRoot = O.Nodes.AppendEmplace(
|
||||
nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
|
||||
Root->FId);
|
||||
TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0u,
|
||||
0u, Root->FId);
|
||||
if (UNLIKELY(TargetRoot == nullptr))
|
||||
return;
|
||||
|
||||
@ -530,8 +459,7 @@ public:
|
||||
});
|
||||
if (TargetCallee == nullptr) {
|
||||
auto NewTargetNode = O.Nodes.AppendEmplace(
|
||||
NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
|
||||
Callee.FId);
|
||||
NT.TargetNode, *O.NodeIdPairAllocator, 0u, 0u, Callee.FId);
|
||||
|
||||
if (UNLIKELY(NewTargetNode == nullptr))
|
||||
return;
|
||||
|
@ -86,8 +86,7 @@ static FunctionCallTrie::Allocators *GlobalAllocators = nullptr;
|
||||
|
||||
void post(const FunctionCallTrie &T, tid_t TId) XRAY_NEVER_INSTRUMENT {
|
||||
static pthread_once_t Once = PTHREAD_ONCE_INIT;
|
||||
pthread_once(
|
||||
&Once, +[]() XRAY_NEVER_INSTRUMENT { reset(); });
|
||||
pthread_once(&Once, +[] { reset(); });
|
||||
|
||||
ThreadTrie *Item = nullptr;
|
||||
{
|
||||
@ -96,14 +95,13 @@ void post(const FunctionCallTrie &T, tid_t TId) XRAY_NEVER_INSTRUMENT {
|
||||
return;
|
||||
|
||||
Item = ThreadTries->Append({});
|
||||
if (Item == nullptr)
|
||||
return;
|
||||
|
||||
Item->TId = TId;
|
||||
auto Trie = reinterpret_cast<FunctionCallTrie *>(&Item->TrieStorage);
|
||||
new (Trie) FunctionCallTrie(*GlobalAllocators);
|
||||
T.deepCopyInto(*Trie);
|
||||
}
|
||||
|
||||
auto Trie = reinterpret_cast<FunctionCallTrie *>(&Item->TrieStorage);
|
||||
T.deepCopyInto(*Trie);
|
||||
}
|
||||
|
||||
// A PathArray represents the function id's representing a stack trace. In this
|
||||
@ -117,7 +115,13 @@ struct ProfileRecord {
|
||||
// The Path in this record is the function id's from the leaf to the root of
|
||||
// the function call stack as represented from a FunctionCallTrie.
|
||||
PathArray Path;
|
||||
const FunctionCallTrie::Node *Node;
|
||||
const FunctionCallTrie::Node *Node = nullptr;
|
||||
|
||||
// Constructor for in-place construction.
|
||||
ProfileRecord(PathAllocator &A,
|
||||
const FunctionCallTrie::Node *N) XRAY_NEVER_INSTRUMENT
|
||||
: Path(A),
|
||||
Node(N) {}
|
||||
};
|
||||
|
||||
namespace {
|
||||
@ -138,7 +142,7 @@ populateRecords(ProfileRecordArray &PRs, ProfileRecord::PathAllocator &PA,
|
||||
while (!DFSStack.empty()) {
|
||||
auto Node = DFSStack.back();
|
||||
DFSStack.trim(1);
|
||||
auto Record = PRs.AppendEmplace(PathArray{PA}, Node);
|
||||
auto Record = PRs.AppendEmplace(PA, Node);
|
||||
if (Record == nullptr)
|
||||
return;
|
||||
DCHECK_NE(Record, nullptr);
|
||||
@ -199,7 +203,7 @@ void serialize() XRAY_NEVER_INSTRUMENT {
|
||||
|
||||
// Clear out the global ProfileBuffers, if it's not empty.
|
||||
for (auto &B : *ProfileBuffers)
|
||||
deallocateBuffer(reinterpret_cast<unsigned char *>(B.Data), B.Size);
|
||||
deallocateBuffer(reinterpret_cast<uint8_t *>(B.Data), B.Size);
|
||||
ProfileBuffers->trim(ProfileBuffers->size());
|
||||
|
||||
if (ThreadTries->empty())
|
||||
@ -274,8 +278,8 @@ void reset() XRAY_NEVER_INSTRUMENT {
|
||||
|
||||
GlobalAllocators =
|
||||
reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorStorage);
|
||||
new (GlobalAllocators)
|
||||
FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
|
||||
new (GlobalAllocators) FunctionCallTrie::Allocators();
|
||||
*GlobalAllocators = FunctionCallTrie::InitAllocators();
|
||||
|
||||
if (ThreadTriesAllocator != nullptr)
|
||||
ThreadTriesAllocator->~ThreadTriesArrayAllocator();
|
||||
@ -308,10 +312,8 @@ XRayBuffer nextBuffer(XRayBuffer B) XRAY_NEVER_INSTRUMENT {
|
||||
static pthread_once_t Once = PTHREAD_ONCE_INIT;
|
||||
static typename std::aligned_storage<sizeof(XRayProfilingFileHeader)>::type
|
||||
FileHeaderStorage;
|
||||
pthread_once(
|
||||
&Once, +[]() XRAY_NEVER_INSTRUMENT {
|
||||
new (&FileHeaderStorage) XRayProfilingFileHeader{};
|
||||
});
|
||||
pthread_once(&Once,
|
||||
+[] { new (&FileHeaderStorage) XRayProfilingFileHeader{}; });
|
||||
|
||||
if (UNLIKELY(B.Data == nullptr)) {
|
||||
// The first buffer should always contain the file header information.
|
||||
|
@ -31,112 +31,67 @@ namespace __xray {
|
||||
|
||||
namespace {
|
||||
|
||||
static atomic_sint32_t ProfilerLogFlushStatus = {
|
||||
atomic_sint32_t ProfilerLogFlushStatus = {
|
||||
XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
|
||||
|
||||
static atomic_sint32_t ProfilerLogStatus = {
|
||||
XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
|
||||
atomic_sint32_t ProfilerLogStatus = {XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
|
||||
|
||||
static SpinMutex ProfilerOptionsMutex;
|
||||
SpinMutex ProfilerOptionsMutex;
|
||||
|
||||
struct ProfilingData {
|
||||
atomic_uintptr_t Allocators;
|
||||
atomic_uintptr_t FCT;
|
||||
struct alignas(64) ProfilingData {
|
||||
FunctionCallTrie::Allocators *Allocators;
|
||||
FunctionCallTrie *FCT;
|
||||
};
|
||||
|
||||
static pthread_key_t ProfilingKey;
|
||||
|
||||
thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
|
||||
alignof(FunctionCallTrie::Allocators)>::type
|
||||
thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators)>::type
|
||||
AllocatorsStorage;
|
||||
thread_local std::aligned_storage<sizeof(FunctionCallTrie),
|
||||
alignof(FunctionCallTrie)>::type
|
||||
thread_local std::aligned_storage<sizeof(FunctionCallTrie)>::type
|
||||
FunctionCallTrieStorage;
|
||||
thread_local ProfilingData TLD{{0}, {0}};
|
||||
thread_local atomic_uint8_t ReentranceGuard{0};
|
||||
thread_local std::aligned_storage<sizeof(ProfilingData)>::type ThreadStorage{};
|
||||
|
||||
// We use a separate guard for ensuring that for this thread, if we're already
|
||||
// cleaning up, that any signal handlers don't attempt to cleanup nor
|
||||
// initialise.
|
||||
thread_local atomic_uint8_t TLDInitGuard{0};
|
||||
|
||||
// We also use a separate latch to signal that the thread is exiting, and
|
||||
// non-essential work should be ignored (things like recording events, etc.).
|
||||
thread_local atomic_uint8_t ThreadExitingLatch{0};
|
||||
|
||||
static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {
|
||||
thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT {
|
||||
pthread_setspecific(ProfilingKey, &TLD);
|
||||
static ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
|
||||
thread_local auto ThreadOnce = [] {
|
||||
new (&ThreadStorage) ProfilingData{};
|
||||
auto *Allocators =
|
||||
reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage);
|
||||
new (Allocators) FunctionCallTrie::Allocators();
|
||||
*Allocators = FunctionCallTrie::InitAllocators();
|
||||
auto *FCT = reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage);
|
||||
new (FCT) FunctionCallTrie(*Allocators);
|
||||
auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage);
|
||||
TLD.Allocators = Allocators;
|
||||
TLD.FCT = FCT;
|
||||
pthread_setspecific(ProfilingKey, &ThreadStorage);
|
||||
return false;
|
||||
}();
|
||||
(void)ThreadOnce;
|
||||
|
||||
RecursionGuard TLDInit(TLDInitGuard);
|
||||
if (!TLDInit)
|
||||
return nullptr;
|
||||
auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage);
|
||||
|
||||
if (atomic_load_relaxed(&ThreadExitingLatch))
|
||||
return nullptr;
|
||||
|
||||
uptr Allocators = 0;
|
||||
if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1,
|
||||
memory_order_acq_rel)) {
|
||||
new (&AllocatorsStorage)
|
||||
FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
|
||||
Allocators = reinterpret_cast<uptr>(
|
||||
reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage));
|
||||
atomic_store(&TLD.Allocators, Allocators, memory_order_release);
|
||||
if (UNLIKELY(TLD.Allocators == nullptr || TLD.FCT == nullptr)) {
|
||||
auto *Allocators =
|
||||
reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage);
|
||||
new (Allocators) FunctionCallTrie::Allocators();
|
||||
*Allocators = FunctionCallTrie::InitAllocators();
|
||||
auto *FCT = reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage);
|
||||
new (FCT) FunctionCallTrie(*Allocators);
|
||||
TLD.Allocators = Allocators;
|
||||
TLD.FCT = FCT;
|
||||
}
|
||||
|
||||
uptr FCT = 0;
|
||||
if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) {
|
||||
new (&FunctionCallTrieStorage) FunctionCallTrie(
|
||||
*reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators));
|
||||
FCT = reinterpret_cast<uptr>(
|
||||
reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage));
|
||||
atomic_store(&TLD.FCT, FCT, memory_order_release);
|
||||
}
|
||||
|
||||
if (FCT == 1)
|
||||
return nullptr;
|
||||
|
||||
return &TLD;
|
||||
return *reinterpret_cast<ProfilingData *>(&ThreadStorage);
|
||||
}
|
||||
|
||||
static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
|
||||
RecursionGuard TLDInit(TLDInitGuard);
|
||||
if (!TLDInit)
|
||||
return;
|
||||
|
||||
auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel);
|
||||
if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>(
|
||||
&FunctionCallTrieStorage)))
|
||||
reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie();
|
||||
|
||||
auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel);
|
||||
if (Allocators ==
|
||||
reinterpret_cast<uptr>(
|
||||
reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
|
||||
reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators();
|
||||
}
|
||||
|
||||
static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {
|
||||
RecursionGuard TLDInit(TLDInitGuard);
|
||||
if (!TLDInit)
|
||||
return;
|
||||
|
||||
uptr P = atomic_load(&T.FCT, memory_order_acquire);
|
||||
if (P != reinterpret_cast<uptr>(
|
||||
reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)))
|
||||
return;
|
||||
|
||||
auto FCT = reinterpret_cast<FunctionCallTrie *>(P);
|
||||
DCHECK_NE(FCT, nullptr);
|
||||
|
||||
if (!FCT->getRoots().empty())
|
||||
profileCollectorService::post(*FCT, GetTid());
|
||||
|
||||
cleanupTLD();
|
||||
auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage);
|
||||
if (TLD.Allocators != nullptr && TLD.FCT != nullptr) {
|
||||
TLD.FCT->~FunctionCallTrie();
|
||||
TLD.Allocators->~Allocators();
|
||||
TLD.FCT = nullptr;
|
||||
TLD.Allocators = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@ -149,6 +104,9 @@ const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
|
||||
#endif
|
||||
}
|
||||
|
||||
atomic_sint32_t ProfileFlushStatus = {
|
||||
XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
|
||||
|
||||
XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
|
||||
if (atomic_load(&ProfilerLogStatus, memory_order_acquire) !=
|
||||
XRayLogInitStatus::XRAY_LOG_FINALIZED) {
|
||||
@ -157,27 +115,14 @@ XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
|
||||
return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
|
||||
}
|
||||
|
||||
RecursionGuard SignalGuard(ReentranceGuard);
|
||||
if (!SignalGuard) {
|
||||
s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
|
||||
if (!atomic_compare_exchange_strong(&ProfilerLogFlushStatus, &Result,
|
||||
XRayLogFlushStatus::XRAY_LOG_FLUSHING,
|
||||
memory_order_acq_rel)) {
|
||||
if (Verbosity())
|
||||
Report("Cannot finalize properly inside a signal handler!\n");
|
||||
atomic_store(&ProfilerLogFlushStatus,
|
||||
XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
|
||||
memory_order_release);
|
||||
return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
|
||||
Report("Not flushing profiles, implementation still finalizing.\n");
|
||||
}
|
||||
|
||||
s32 Previous = atomic_exchange(&ProfilerLogFlushStatus,
|
||||
XRayLogFlushStatus::XRAY_LOG_FLUSHING,
|
||||
memory_order_acq_rel);
|
||||
if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
|
||||
if (Verbosity())
|
||||
Report("Not flushing profiles, implementation still flushing.\n");
|
||||
return XRayLogFlushStatus::XRAY_LOG_FLUSHING;
|
||||
}
|
||||
|
||||
postCurrentThreadFCT(TLD);
|
||||
|
||||
// At this point, we'll create the file that will contain the profile, but
|
||||
// only if the options say so.
|
||||
if (!profilingFlags()->no_flush) {
|
||||
@ -205,19 +150,33 @@ XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up the current thread's TLD information as well.
|
||||
cleanupTLD();
|
||||
|
||||
profileCollectorService::reset();
|
||||
|
||||
atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
|
||||
memory_order_release);
|
||||
// Flush the current thread's local data structures as well.
|
||||
cleanupTLD();
|
||||
|
||||
atomic_store(&ProfilerLogStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
|
||||
memory_order_release);
|
||||
|
||||
return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
thread_local atomic_uint8_t ReentranceGuard{0};
|
||||
|
||||
static void postCurrentThreadFCT(ProfilingData &TLD) XRAY_NEVER_INSTRUMENT {
|
||||
if (TLD.Allocators == nullptr || TLD.FCT == nullptr)
|
||||
return;
|
||||
|
||||
if (!TLD.FCT->getRoots().empty())
|
||||
profileCollectorService::post(*TLD.FCT, GetTid());
|
||||
|
||||
cleanupTLD();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void profilingHandleArg0(int32_t FuncId,
|
||||
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
|
||||
unsigned char CPU;
|
||||
@ -227,29 +186,22 @@ void profilingHandleArg0(int32_t FuncId,
|
||||
return;
|
||||
|
||||
auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire);
|
||||
if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED ||
|
||||
Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING))
|
||||
return;
|
||||
|
||||
if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED ||
|
||||
Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) {
|
||||
auto &TLD = getThreadLocalData();
|
||||
postCurrentThreadFCT(TLD);
|
||||
return;
|
||||
}
|
||||
|
||||
auto T = getThreadLocalData();
|
||||
if (T == nullptr)
|
||||
return;
|
||||
|
||||
auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT));
|
||||
auto &TLD = getThreadLocalData();
|
||||
switch (Entry) {
|
||||
case XRayEntryType::ENTRY:
|
||||
case XRayEntryType::LOG_ARGS_ENTRY:
|
||||
FCT->enterFunction(FuncId, TSC, CPU);
|
||||
TLD.FCT->enterFunction(FuncId, TSC, CPU);
|
||||
break;
|
||||
case XRayEntryType::EXIT:
|
||||
case XRayEntryType::TAIL:
|
||||
FCT->exitFunction(FuncId, TSC, CPU);
|
||||
TLD.FCT->exitFunction(FuncId, TSC, CPU);
|
||||
break;
|
||||
default:
|
||||
// FIXME: Handle bugs.
|
||||
@ -275,14 +227,15 @@ XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
|
||||
// Wait a grace period to allow threads to see that we're finalizing.
|
||||
SleepForMillis(profilingFlags()->grace_period_ms);
|
||||
|
||||
// If we for some reason are entering this function from an instrumented
|
||||
// handler, we bail out.
|
||||
RecursionGuard G(ReentranceGuard);
|
||||
if (!G)
|
||||
return static_cast<XRayLogInitStatus>(CurrentStatus);
|
||||
|
||||
// Post the current thread's data if we have any.
|
||||
postCurrentThreadFCT(TLD);
|
||||
// We also want to make sure that the current thread's data is cleaned up, if
|
||||
// we have any. We need to ensure that the call to postCurrentThreadFCT() is
|
||||
// guarded by our recursion guard.
|
||||
auto &TLD = getThreadLocalData();
|
||||
{
|
||||
RecursionGuard G(ReentranceGuard);
|
||||
if (G)
|
||||
postCurrentThreadFCT(TLD);
|
||||
}
|
||||
|
||||
// Then we force serialize the log data.
|
||||
profileCollectorService::serialize();
|
||||
@ -295,10 +248,6 @@ XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
|
||||
XRayLogInitStatus
|
||||
profilingLoggingInit(UNUSED size_t BufferSize, UNUSED size_t BufferMax,
|
||||
void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
|
||||
RecursionGuard G(ReentranceGuard);
|
||||
if (!G)
|
||||
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
|
||||
|
||||
s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
|
||||
if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
|
||||
XRayLogInitStatus::XRAY_LOG_INITIALIZING,
|
||||
@ -333,51 +282,39 @@ profilingLoggingInit(UNUSED size_t BufferSize, UNUSED size_t BufferMax,
|
||||
|
||||
// We need to set up the exit handlers.
|
||||
static pthread_once_t Once = PTHREAD_ONCE_INIT;
|
||||
pthread_once(
|
||||
&Once, +[] {
|
||||
pthread_key_create(
|
||||
&ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT {
|
||||
if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
|
||||
return;
|
||||
pthread_once(&Once, +[] {
|
||||
pthread_key_create(&ProfilingKey, +[](void *P) {
|
||||
// This is the thread-exit handler.
|
||||
auto &TLD = *reinterpret_cast<ProfilingData *>(P);
|
||||
if (TLD.Allocators == nullptr && TLD.FCT == nullptr)
|
||||
return;
|
||||
|
||||
if (P == nullptr)
|
||||
return;
|
||||
{
|
||||
// If we're somehow executing this while inside a non-reentrant-friendly
|
||||
// context, we skip attempting to post the current thread's data.
|
||||
RecursionGuard G(ReentranceGuard);
|
||||
if (G)
|
||||
postCurrentThreadFCT(TLD);
|
||||
}
|
||||
});
|
||||
|
||||
auto T = reinterpret_cast<ProfilingData *>(P);
|
||||
if (atomic_load_relaxed(&T->Allocators) == 0)
|
||||
return;
|
||||
|
||||
{
|
||||
// If we're somehow executing this while inside a
|
||||
// non-reentrant-friendly context, we skip attempting to post
|
||||
// the current thread's data.
|
||||
RecursionGuard G(ReentranceGuard);
|
||||
if (!G)
|
||||
return;
|
||||
|
||||
postCurrentThreadFCT(*T);
|
||||
}
|
||||
});
|
||||
|
||||
// We also need to set up an exit handler, so that we can get the
|
||||
// profile information at exit time. We use the C API to do this, to not
|
||||
// rely on C++ ABI functions for registering exit handlers.
|
||||
Atexit(+[]() XRAY_NEVER_INSTRUMENT {
|
||||
if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
|
||||
return;
|
||||
|
||||
auto Cleanup =
|
||||
at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); });
|
||||
|
||||
// Finalize and flush.
|
||||
if (profilingFinalize() != XRAY_LOG_FINALIZED ||
|
||||
profilingFlush() != XRAY_LOG_FLUSHED)
|
||||
return;
|
||||
|
||||
if (Verbosity())
|
||||
Report("XRay Profile flushed at exit.");
|
||||
});
|
||||
});
|
||||
// We also need to set up an exit handler, so that we can get the profile
|
||||
// information at exit time. We use the C API to do this, to not rely on C++
|
||||
// ABI functions for registering exit handlers.
|
||||
Atexit(+[] {
|
||||
// Finalize and flush.
|
||||
if (profilingFinalize() != XRAY_LOG_FINALIZED) {
|
||||
cleanupTLD();
|
||||
return;
|
||||
}
|
||||
if (profilingFlush() != XRAY_LOG_FLUSHED) {
|
||||
cleanupTLD();
|
||||
return;
|
||||
}
|
||||
if (Verbosity())
|
||||
Report("XRay Profile flushed at exit.");
|
||||
});
|
||||
});
|
||||
|
||||
__xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
|
||||
__xray_set_handler(profilingHandleArg0);
|
||||
|
@ -32,9 +32,14 @@ namespace __xray {
|
||||
/// is destroyed. When an Array is destroyed, it will destroy elements in the
|
||||
/// backing store but will not free the memory.
|
||||
template <class T> class Array {
|
||||
struct Segment {
|
||||
Segment *Prev;
|
||||
Segment *Next;
|
||||
struct SegmentBase {
|
||||
SegmentBase *Prev;
|
||||
SegmentBase *Next;
|
||||
};
|
||||
|
||||
// We want each segment of the array to be cache-line aligned, and elements of
|
||||
// the array be offset from the beginning of the segment.
|
||||
struct Segment : SegmentBase {
|
||||
char Data[1];
|
||||
};
|
||||
|
||||
@ -57,35 +62,91 @@ public:
|
||||
// kCacheLineSize-multiple segments, minus the size of two pointers.
|
||||
//
|
||||
// - Request cacheline-multiple sized elements from the allocator.
|
||||
static constexpr uint64_t AlignedElementStorageSize =
|
||||
static constexpr size_t AlignedElementStorageSize =
|
||||
sizeof(typename std::aligned_storage<sizeof(T), alignof(T)>::type);
|
||||
|
||||
static constexpr uint64_t SegmentControlBlockSize = sizeof(Segment *) * 2;
|
||||
|
||||
static constexpr uint64_t SegmentSize = nearest_boundary(
|
||||
SegmentControlBlockSize + next_pow2(sizeof(T)), kCacheLineSize);
|
||||
static constexpr size_t SegmentSize =
|
||||
nearest_boundary(sizeof(Segment) + next_pow2(sizeof(T)), kCacheLineSize);
|
||||
|
||||
using AllocatorType = Allocator<SegmentSize>;
|
||||
|
||||
static constexpr uint64_t ElementsPerSegment =
|
||||
(SegmentSize - SegmentControlBlockSize) / next_pow2(sizeof(T));
|
||||
static constexpr size_t ElementsPerSegment =
|
||||
(SegmentSize - sizeof(Segment)) / next_pow2(sizeof(T));
|
||||
|
||||
static_assert(ElementsPerSegment > 0,
|
||||
"Must have at least 1 element per segment.");
|
||||
|
||||
static Segment SentinelSegment;
|
||||
static SegmentBase SentinelSegment;
|
||||
|
||||
using size_type = uint64_t;
|
||||
using size_type = size_t;
|
||||
|
||||
private:
|
||||
AllocatorType *Alloc;
|
||||
SegmentBase *Head = &SentinelSegment;
|
||||
SegmentBase *Tail = &SentinelSegment;
|
||||
size_t Size = 0;
|
||||
|
||||
// Here we keep track of segments in the freelist, to allow us to re-use
|
||||
// segments when elements are trimmed off the end.
|
||||
SegmentBase *Freelist = &SentinelSegment;
|
||||
|
||||
Segment *NewSegment() XRAY_NEVER_INSTRUMENT {
|
||||
// We need to handle the case in which enough elements have been trimmed to
|
||||
// allow us to re-use segments we've allocated before. For this we look into
|
||||
// the Freelist, to see whether we need to actually allocate new blocks or
|
||||
// just re-use blocks we've already seen before.
|
||||
if (Freelist != &SentinelSegment) {
|
||||
auto *FreeSegment = Freelist;
|
||||
Freelist = FreeSegment->Next;
|
||||
FreeSegment->Next = &SentinelSegment;
|
||||
Freelist->Prev = &SentinelSegment;
|
||||
return static_cast<Segment *>(FreeSegment);
|
||||
}
|
||||
|
||||
auto SegmentBlock = Alloc->Allocate();
|
||||
if (SegmentBlock.Data == nullptr)
|
||||
return nullptr;
|
||||
|
||||
// Placement-new the Segment element at the beginning of the SegmentBlock.
|
||||
auto S = reinterpret_cast<Segment *>(SegmentBlock.Data);
|
||||
new (S) SegmentBase{&SentinelSegment, &SentinelSegment};
|
||||
return S;
|
||||
}
|
||||
|
||||
Segment *InitHeadAndTail() XRAY_NEVER_INSTRUMENT {
|
||||
DCHECK_EQ(Head, &SentinelSegment);
|
||||
DCHECK_EQ(Tail, &SentinelSegment);
|
||||
auto Segment = NewSegment();
|
||||
if (Segment == nullptr)
|
||||
return nullptr;
|
||||
DCHECK_EQ(Segment->Next, &SentinelSegment);
|
||||
DCHECK_EQ(Segment->Prev, &SentinelSegment);
|
||||
Head = Tail = static_cast<SegmentBase *>(Segment);
|
||||
return Segment;
|
||||
}
|
||||
|
||||
Segment *AppendNewSegment() XRAY_NEVER_INSTRUMENT {
|
||||
auto S = NewSegment();
|
||||
if (S == nullptr)
|
||||
return nullptr;
|
||||
DCHECK_NE(Tail, &SentinelSegment);
|
||||
DCHECK_EQ(Tail->Next, &SentinelSegment);
|
||||
DCHECK_EQ(S->Prev, &SentinelSegment);
|
||||
DCHECK_EQ(S->Next, &SentinelSegment);
|
||||
Tail->Next = S;
|
||||
S->Prev = Tail;
|
||||
Tail = S;
|
||||
return static_cast<Segment *>(Tail);
|
||||
}
|
||||
|
||||
// This Iterator models a BidirectionalIterator.
|
||||
template <class U> class Iterator {
|
||||
Segment *S = &SentinelSegment;
|
||||
uint64_t Offset = 0;
|
||||
uint64_t Size = 0;
|
||||
SegmentBase *S = &SentinelSegment;
|
||||
size_t Offset = 0;
|
||||
size_t Size = 0;
|
||||
|
||||
public:
|
||||
Iterator(Segment *IS, uint64_t Off, uint64_t S) XRAY_NEVER_INSTRUMENT
|
||||
Iterator(SegmentBase *IS, size_t Off, size_t S) XRAY_NEVER_INSTRUMENT
|
||||
: S(IS),
|
||||
Offset(Off),
|
||||
Size(S) {}
|
||||
@ -154,7 +215,7 @@ private:
|
||||
|
||||
// We need to compute the character-aligned pointer, offset from the
|
||||
// segment's Data location to get the element in the position of Offset.
|
||||
auto Base = &S->Data;
|
||||
auto Base = static_cast<Segment *>(S)->Data;
|
||||
auto AlignedOffset = Base + (RelOff * AlignedElementStorageSize);
|
||||
return *reinterpret_cast<U *>(AlignedOffset);
|
||||
}
|
||||
@ -162,183 +223,17 @@ private:
|
||||
U *operator->() const XRAY_NEVER_INSTRUMENT { return &(**this); }
|
||||
};
|
||||
|
||||
AllocatorType *Alloc;
|
||||
Segment *Head;
|
||||
Segment *Tail;
|
||||
|
||||
// Here we keep track of segments in the freelist, to allow us to re-use
|
||||
// segments when elements are trimmed off the end.
|
||||
Segment *Freelist;
|
||||
uint64_t Size;
|
||||
|
||||
// ===============================
|
||||
// In the following implementation, we work through the algorithms and the
|
||||
// list operations using the following notation:
|
||||
//
|
||||
// - pred(s) is the predecessor (previous node accessor) and succ(s) is
|
||||
// the successor (next node accessor).
|
||||
//
|
||||
// - S is a sentinel segment, which has the following property:
|
||||
//
|
||||
// pred(S) == succ(S) == S
|
||||
//
|
||||
// - @ is a loop operator, which can imply pred(s) == s if it appears on
|
||||
// the left of s, or succ(s) == S if it appears on the right of s.
|
||||
//
|
||||
// - sL <-> sR : means a bidirectional relation between sL and sR, which
|
||||
// means:
|
||||
//
|
||||
// succ(sL) == sR && pred(SR) == sL
|
||||
//
|
||||
// - sL -> sR : implies a unidirectional relation between sL and SR,
|
||||
// with the following properties:
|
||||
//
|
||||
// succ(sL) == sR
|
||||
//
|
||||
// sL <- sR : implies a unidirectional relation between sR and sL,
|
||||
// with the following properties:
|
||||
//
|
||||
// pred(sR) == sL
|
||||
//
|
||||
// ===============================
|
||||
|
||||
Segment *NewSegment() XRAY_NEVER_INSTRUMENT {
|
||||
// We need to handle the case in which enough elements have been trimmed to
|
||||
// allow us to re-use segments we've allocated before. For this we look into
|
||||
// the Freelist, to see whether we need to actually allocate new blocks or
|
||||
// just re-use blocks we've already seen before.
|
||||
if (Freelist != &SentinelSegment) {
|
||||
// The current state of lists resemble something like this at this point:
|
||||
//
|
||||
// Freelist: @S@<-f0->...<->fN->@S@
|
||||
// ^ Freelist
|
||||
//
|
||||
// We want to perform a splice of `f0` from Freelist to a temporary list,
|
||||
// which looks like:
|
||||
//
|
||||
// Templist: @S@<-f0->@S@
|
||||
// ^ FreeSegment
|
||||
//
|
||||
// Our algorithm preconditions are:
|
||||
DCHECK_EQ(Freelist->Prev, &SentinelSegment);
|
||||
|
||||
// Then the algorithm we implement is:
|
||||
//
|
||||
// SFS = Freelist
|
||||
// Freelist = succ(Freelist)
|
||||
// if (Freelist != S)
|
||||
// pred(Freelist) = S
|
||||
// succ(SFS) = S
|
||||
// pred(SFS) = S
|
||||
//
|
||||
auto *FreeSegment = Freelist;
|
||||
Freelist = Freelist->Next;
|
||||
|
||||
// Note that we need to handle the case where Freelist is now pointing to
|
||||
// S, which we don't want to be overwriting.
|
||||
// TODO: Determine whether the cost of the branch is higher than the cost
|
||||
// of the blind assignment.
|
||||
if (Freelist != &SentinelSegment)
|
||||
Freelist->Prev = &SentinelSegment;
|
||||
|
||||
FreeSegment->Next = &SentinelSegment;
|
||||
FreeSegment->Prev = &SentinelSegment;
|
||||
|
||||
// Our postconditions are:
|
||||
DCHECK_EQ(Freelist->Prev, &SentinelSegment);
|
||||
DCHECK_NE(FreeSegment, &SentinelSegment);
|
||||
return FreeSegment;
|
||||
}
|
||||
|
||||
auto SegmentBlock = Alloc->Allocate();
|
||||
if (SegmentBlock.Data == nullptr)
|
||||
return nullptr;
|
||||
|
||||
// Placement-new the Segment element at the beginning of the SegmentBlock.
|
||||
new (SegmentBlock.Data) Segment{&SentinelSegment, &SentinelSegment, {0}};
|
||||
auto SB = reinterpret_cast<Segment *>(SegmentBlock.Data);
|
||||
return SB;
|
||||
}
|
||||
|
||||
Segment *InitHeadAndTail() XRAY_NEVER_INSTRUMENT {
|
||||
DCHECK_EQ(Head, &SentinelSegment);
|
||||
DCHECK_EQ(Tail, &SentinelSegment);
|
||||
auto S = NewSegment();
|
||||
if (S == nullptr)
|
||||
return nullptr;
|
||||
DCHECK_EQ(S->Next, &SentinelSegment);
|
||||
DCHECK_EQ(S->Prev, &SentinelSegment);
|
||||
DCHECK_NE(S, &SentinelSegment);
|
||||
Head = S;
|
||||
Tail = S;
|
||||
DCHECK_EQ(Head, Tail);
|
||||
DCHECK_EQ(Tail->Next, &SentinelSegment);
|
||||
DCHECK_EQ(Tail->Prev, &SentinelSegment);
|
||||
return S;
|
||||
}
|
||||
|
||||
Segment *AppendNewSegment() XRAY_NEVER_INSTRUMENT {
|
||||
auto S = NewSegment();
|
||||
if (S == nullptr)
|
||||
return nullptr;
|
||||
DCHECK_NE(Tail, &SentinelSegment);
|
||||
DCHECK_EQ(Tail->Next, &SentinelSegment);
|
||||
DCHECK_EQ(S->Prev, &SentinelSegment);
|
||||
DCHECK_EQ(S->Next, &SentinelSegment);
|
||||
S->Prev = Tail;
|
||||
Tail->Next = S;
|
||||
Tail = S;
|
||||
DCHECK_EQ(S, S->Prev->Next);
|
||||
DCHECK_EQ(Tail->Next, &SentinelSegment);
|
||||
return S;
|
||||
}
|
||||
|
||||
public:
|
||||
explicit Array(AllocatorType &A) XRAY_NEVER_INSTRUMENT
|
||||
: Alloc(&A),
|
||||
Head(&SentinelSegment),
|
||||
Tail(&SentinelSegment),
|
||||
Freelist(&SentinelSegment),
|
||||
Size(0) {}
|
||||
|
||||
Array() XRAY_NEVER_INSTRUMENT : Alloc(nullptr),
|
||||
Head(&SentinelSegment),
|
||||
Tail(&SentinelSegment),
|
||||
Freelist(&SentinelSegment),
|
||||
Size(0) {}
|
||||
explicit Array(AllocatorType &A) XRAY_NEVER_INSTRUMENT : Alloc(&A) {}
|
||||
|
||||
Array(const Array &) = delete;
|
||||
Array &operator=(const Array &) = delete;
|
||||
|
||||
Array(Array &&O) XRAY_NEVER_INSTRUMENT : Alloc(O.Alloc),
|
||||
Head(O.Head),
|
||||
Tail(O.Tail),
|
||||
Freelist(O.Freelist),
|
||||
Size(O.Size) {
|
||||
O.Alloc = nullptr;
|
||||
Array(Array &&O) NOEXCEPT : Alloc(O.Alloc),
|
||||
Head(O.Head),
|
||||
Tail(O.Tail),
|
||||
Size(O.Size) {
|
||||
O.Head = &SentinelSegment;
|
||||
O.Tail = &SentinelSegment;
|
||||
O.Size = 0;
|
||||
O.Freelist = &SentinelSegment;
|
||||
}
|
||||
|
||||
Array &operator=(Array &&O) XRAY_NEVER_INSTRUMENT {
|
||||
Alloc = O.Alloc;
|
||||
O.Alloc = nullptr;
|
||||
Head = O.Head;
|
||||
O.Head = &SentinelSegment;
|
||||
Tail = O.Tail;
|
||||
O.Tail = &SentinelSegment;
|
||||
Freelist = O.Freelist;
|
||||
O.Freelist = &SentinelSegment;
|
||||
Size = O.Size;
|
||||
O.Size = 0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
~Array() XRAY_NEVER_INSTRUMENT {
|
||||
for (auto &E : *this)
|
||||
(&E)->~T();
|
||||
}
|
||||
|
||||
bool empty() const XRAY_NEVER_INSTRUMENT { return Size == 0; }
|
||||
@ -348,41 +243,52 @@ public:
|
||||
return *Alloc;
|
||||
}
|
||||
|
||||
uint64_t size() const XRAY_NEVER_INSTRUMENT { return Size; }
|
||||
size_t size() const XRAY_NEVER_INSTRUMENT { return Size; }
|
||||
|
||||
template <class... Args>
|
||||
T *AppendEmplace(Args &&... args) XRAY_NEVER_INSTRUMENT {
|
||||
DCHECK((Size == 0 && Head == &SentinelSegment && Head == Tail) ||
|
||||
(Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment));
|
||||
if (UNLIKELY(Head == &SentinelSegment)) {
|
||||
auto R = InitHeadAndTail();
|
||||
if (R == nullptr)
|
||||
T *Append(const T &E) XRAY_NEVER_INSTRUMENT {
|
||||
if (UNLIKELY(Head == &SentinelSegment))
|
||||
if (InitHeadAndTail() == nullptr)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DCHECK_NE(Head, &SentinelSegment);
|
||||
DCHECK_NE(Tail, &SentinelSegment);
|
||||
|
||||
auto Offset = Size % ElementsPerSegment;
|
||||
if (UNLIKELY(Size != 0 && Offset == 0))
|
||||
if (AppendNewSegment() == nullptr)
|
||||
return nullptr;
|
||||
|
||||
DCHECK_NE(Tail, &SentinelSegment);
|
||||
auto Base = &Tail->Data;
|
||||
auto Base = static_cast<Segment *>(Tail)->Data;
|
||||
auto AlignedOffset = Base + (Offset * AlignedElementStorageSize);
|
||||
DCHECK_LE(AlignedOffset + sizeof(T),
|
||||
reinterpret_cast<unsigned char *>(Tail) + SegmentSize);
|
||||
|
||||
// In-place construct at Position.
|
||||
new (AlignedOffset) T{std::forward<Args>(args)...};
|
||||
auto Position = reinterpret_cast<T *>(AlignedOffset);
|
||||
*Position = E;
|
||||
++Size;
|
||||
return reinterpret_cast<T *>(AlignedOffset);
|
||||
return Position;
|
||||
}
|
||||
|
||||
T *Append(const T &E) XRAY_NEVER_INSTRUMENT { return AppendEmplace(E); }
|
||||
template <class... Args>
|
||||
T *AppendEmplace(Args &&... args) XRAY_NEVER_INSTRUMENT {
|
||||
if (UNLIKELY(Head == &SentinelSegment))
|
||||
if (InitHeadAndTail() == nullptr)
|
||||
return nullptr;
|
||||
|
||||
T &operator[](uint64_t Offset) const XRAY_NEVER_INSTRUMENT {
|
||||
auto Offset = Size % ElementsPerSegment;
|
||||
auto *LatestSegment = Tail;
|
||||
if (UNLIKELY(Size != 0 && Offset == 0)) {
|
||||
LatestSegment = AppendNewSegment();
|
||||
if (LatestSegment == nullptr)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DCHECK_NE(Tail, &SentinelSegment);
|
||||
auto Base = static_cast<Segment *>(LatestSegment)->Data;
|
||||
auto AlignedOffset = Base + (Offset * AlignedElementStorageSize);
|
||||
auto Position = reinterpret_cast<T *>(AlignedOffset);
|
||||
|
||||
// In-place construct at Position.
|
||||
new (Position) T{std::forward<Args>(args)...};
|
||||
++Size;
|
||||
return reinterpret_cast<T *>(Position);
|
||||
}
|
||||
|
||||
T &operator[](size_t Offset) const XRAY_NEVER_INSTRUMENT {
|
||||
DCHECK_LE(Offset, Size);
|
||||
// We need to traverse the array enough times to find the element at Offset.
|
||||
auto S = Head;
|
||||
@ -391,7 +297,7 @@ public:
|
||||
Offset -= ElementsPerSegment;
|
||||
DCHECK_NE(S, &SentinelSegment);
|
||||
}
|
||||
auto Base = &S->Data;
|
||||
auto Base = static_cast<Segment *>(S)->Data;
|
||||
auto AlignedOffset = Base + (Offset * AlignedElementStorageSize);
|
||||
auto Position = reinterpret_cast<T *>(AlignedOffset);
|
||||
return *reinterpret_cast<T *>(Position);
|
||||
@ -426,172 +332,41 @@ public:
|
||||
|
||||
/// Remove N Elements from the end. This leaves the blocks behind, and not
|
||||
/// require allocation of new blocks for new elements added after trimming.
|
||||
void trim(uint64_t Elements) XRAY_NEVER_INSTRUMENT {
|
||||
void trim(size_t Elements) XRAY_NEVER_INSTRUMENT {
|
||||
if (Elements == 0)
|
||||
return;
|
||||
|
||||
auto OldSize = Size;
|
||||
Elements = Elements > Size ? Size : Elements;
|
||||
Elements = Elements >= Size ? Size : Elements;
|
||||
Size -= Elements;
|
||||
|
||||
// We compute the number of segments we're going to return from the tail by
|
||||
// counting how many elements have been trimmed. Given the following:
|
||||
//
|
||||
// - Each segment has N valid positions, where N > 0
|
||||
// - The previous size > current size
|
||||
//
|
||||
// To compute the number of segments to return, we need to perform the
|
||||
// following calculations for the number of segments required given 'x'
|
||||
// elements:
|
||||
//
|
||||
// f(x) = {
|
||||
// x == 0 : 0
|
||||
// , 0 < x <= N : 1
|
||||
// , N < x <= max : x / N + (x % N ? 1 : 0)
|
||||
// }
|
||||
//
|
||||
// We can simplify this down to:
|
||||
//
|
||||
// f(x) = {
|
||||
// x == 0 : 0,
|
||||
// , 0 < x <= max : x / N + (x < N || x % N ? 1 : 0)
|
||||
// }
|
||||
//
|
||||
// And further down to:
|
||||
//
|
||||
// f(x) = x ? x / N + (x < N || x % N ? 1 : 0) : 0
|
||||
//
|
||||
// We can then perform the following calculation `s` which counts the number
|
||||
// of segments we need to remove from the end of the data structure:
|
||||
//
|
||||
// s(p, c) = f(p) - f(c)
|
||||
//
|
||||
// If we treat p = previous size, and c = current size, and given the
|
||||
// properties above, the possible range for s(...) is [0..max(typeof(p))/N]
|
||||
// given that typeof(p) == typeof(c).
|
||||
auto F = [](uint64_t X) {
|
||||
return X ? (X / ElementsPerSegment) +
|
||||
(X < ElementsPerSegment || X % ElementsPerSegment ? 1 : 0)
|
||||
: 0;
|
||||
};
|
||||
auto PS = F(OldSize);
|
||||
auto CS = F(Size);
|
||||
DCHECK_GE(PS, CS);
|
||||
auto SegmentsToTrim = PS - CS;
|
||||
for (auto I = 0uL; I < SegmentsToTrim; ++I) {
|
||||
// Here we place the current tail segment to the freelist. To do this
|
||||
// appropriately, we need to perform a splice operation on two
|
||||
// bidirectional linked-lists. In particular, we have the current state of
|
||||
// the doubly-linked list of segments:
|
||||
//
|
||||
// @S@ <- s0 <-> s1 <-> ... <-> sT -> @S@
|
||||
//
|
||||
DCHECK_NE(Head, &SentinelSegment);
|
||||
DCHECK_NE(Tail, &SentinelSegment);
|
||||
DCHECK_NE(Head, &SentinelSegment);
|
||||
DCHECK_NE(Tail, &SentinelSegment);
|
||||
|
||||
for (auto SegmentsToTrim = (nearest_boundary(OldSize, ElementsPerSegment) -
|
||||
nearest_boundary(Size, ElementsPerSegment)) /
|
||||
ElementsPerSegment;
|
||||
SegmentsToTrim > 0; --SegmentsToTrim) {
|
||||
|
||||
// We want to short-circuit if the trace is already empty.
|
||||
if (Head == &SentinelSegment && Head == Tail)
|
||||
return;
|
||||
|
||||
// Put the tail into the Freelist.
|
||||
auto *FreeSegment = Tail;
|
||||
Tail = Tail->Prev;
|
||||
if (Tail == &SentinelSegment)
|
||||
Head = Tail;
|
||||
else
|
||||
Tail->Next = &SentinelSegment;
|
||||
|
||||
DCHECK_EQ(Tail->Next, &SentinelSegment);
|
||||
|
||||
if (Freelist == &SentinelSegment) {
|
||||
// Our two lists at this point are in this configuration:
|
||||
//
|
||||
// Freelist: (potentially) @S@
|
||||
// Mainlist: @S@<-s0<->s1<->...<->sPT<->sT->@S@
|
||||
// ^ Head ^ Tail
|
||||
//
|
||||
// The end state for us will be this configuration:
|
||||
//
|
||||
// Freelist: @S@<-sT->@S@
|
||||
// Mainlist: @S@<-s0<->s1<->...<->sPT->@S@
|
||||
// ^ Head ^ Tail
|
||||
//
|
||||
// The first step for us is to hold a reference to the tail of Mainlist,
|
||||
// which in our notation is represented by sT. We call this our "free
|
||||
// segment" which is the segment we are placing on the Freelist.
|
||||
//
|
||||
// sF = sT
|
||||
//
|
||||
// Then, we also hold a reference to the "pre-tail" element, which we
|
||||
// call sPT:
|
||||
//
|
||||
// sPT = pred(sT)
|
||||
//
|
||||
// We want to splice sT into the beginning of the Freelist, which in
|
||||
// an empty Freelist means placing a segment whose predecessor and
|
||||
// successor is the sentinel segment.
|
||||
//
|
||||
// The splice operation then can be performed in the following
|
||||
// algorithm:
|
||||
//
|
||||
// succ(sPT) = S
|
||||
// pred(sT) = S
|
||||
// succ(sT) = Freelist
|
||||
// Freelist = sT
|
||||
// Tail = sPT
|
||||
//
|
||||
auto SPT = Tail->Prev;
|
||||
SPT->Next = &SentinelSegment;
|
||||
Tail->Prev = &SentinelSegment;
|
||||
Tail->Next = Freelist;
|
||||
Freelist = Tail;
|
||||
Tail = SPT;
|
||||
|
||||
// Our post-conditions here are:
|
||||
DCHECK_EQ(Tail->Next, &SentinelSegment);
|
||||
DCHECK_EQ(Freelist->Prev, &SentinelSegment);
|
||||
} else {
|
||||
// In the other case, where the Freelist is not empty, we perform the
|
||||
// following transformation instead:
|
||||
//
|
||||
// This transforms the current state:
|
||||
//
|
||||
// Freelist: @S@<-f0->@S@
|
||||
// ^ Freelist
|
||||
// Mainlist: @S@<-s0<->s1<->...<->sPT<->sT->@S@
|
||||
// ^ Head ^ Tail
|
||||
//
|
||||
// Into the following:
|
||||
//
|
||||
// Freelist: @S@<-sT<->f0->@S@
|
||||
// ^ Freelist
|
||||
// Mainlist: @S@<-s0<->s1<->...<->sPT->@S@
|
||||
// ^ Head ^ Tail
|
||||
//
|
||||
// The algorithm is:
|
||||
//
|
||||
// sFH = Freelist
|
||||
// sPT = pred(sT)
|
||||
// pred(SFH) = sT
|
||||
// succ(sT) = Freelist
|
||||
// pred(sT) = S
|
||||
// succ(sPT) = S
|
||||
// Tail = sPT
|
||||
// Freelist = sT
|
||||
//
|
||||
auto SFH = Freelist;
|
||||
auto SPT = Tail->Prev;
|
||||
auto ST = Tail;
|
||||
SFH->Prev = ST;
|
||||
ST->Next = Freelist;
|
||||
ST->Prev = &SentinelSegment;
|
||||
SPT->Next = &SentinelSegment;
|
||||
Tail = SPT;
|
||||
Freelist = ST;
|
||||
|
||||
// Our post-conditions here are:
|
||||
DCHECK_EQ(Tail->Next, &SentinelSegment);
|
||||
DCHECK_EQ(Freelist->Prev, &SentinelSegment);
|
||||
DCHECK_EQ(Freelist->Next->Prev, Freelist);
|
||||
}
|
||||
FreeSegment->Next = Freelist;
|
||||
FreeSegment->Prev = &SentinelSegment;
|
||||
if (Freelist != &SentinelSegment)
|
||||
Freelist->Prev = FreeSegment;
|
||||
Freelist = FreeSegment;
|
||||
}
|
||||
|
||||
// Now in case we've spliced all the segments in the end, we ensure that the
|
||||
// main list is "empty", or both the head and tail pointing to the sentinel
|
||||
// segment.
|
||||
if (Tail == &SentinelSegment)
|
||||
Head = Tail;
|
||||
|
||||
DCHECK(
|
||||
(Size == 0 && Head == &SentinelSegment && Tail == &SentinelSegment) ||
|
||||
(Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment));
|
||||
DCHECK(
|
||||
(Freelist != &SentinelSegment && Freelist->Prev == &SentinelSegment) ||
|
||||
(Freelist == &SentinelSegment && Tail->Next == &SentinelSegment));
|
||||
}
|
||||
|
||||
// Provide iterators.
|
||||
@ -613,8 +388,8 @@ public:
|
||||
// ensure that storage for the SentinelSegment is defined and has a single
|
||||
// address.
|
||||
template <class T>
|
||||
typename Array<T>::Segment Array<T>::SentinelSegment{
|
||||
&Array<T>::SentinelSegment, &Array<T>::SentinelSegment, {'\0'}};
|
||||
typename Array<T>::SegmentBase Array<T>::SentinelSegment{
|
||||
&Array<T>::SentinelSegment, &Array<T>::SentinelSegment};
|
||||
|
||||
} // namespace __xray
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user