mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-18 21:38:10 +00:00
Cache uwnind frame headers as they are found.
Summary: This improves unwind performance quite substantially, and follows a somewhat similar approach used in libgcc_s as described in the thread here: https://gcc.gnu.org/ml/gcc/2005-02/msg00625.html On certain extremely exception heavy internal tests, the time drops from about 80 minutes to about five minutes. Subscribers: libcxx-commits Tags: #libc Differential Revision: https://reviews.llvm.org/D75954
This commit is contained in:
parent
30804d0a3f
commit
c53c2058ff
@ -452,6 +452,11 @@ struct _LIBUNWIND_HIDDEN dl_iterate_cb_data {
|
||||
#error "_LIBUNWIND_SUPPORT_DWARF_UNWIND requires _LIBUNWIND_SUPPORT_DWARF_INDEX on this platform."
|
||||
#endif
|
||||
|
||||
#include "FrameHeaderCache.hpp"
|
||||
|
||||
// There should be just one of these per process.
|
||||
static FrameHeaderCache ProcessFrameHeaderCache;
|
||||
|
||||
static bool checkAddrInSegment(const Elf_Phdr *phdr, size_t image_base,
|
||||
dl_iterate_cb_data *cbdata) {
|
||||
if (phdr->p_type == PT_LOAD) {
|
||||
@ -466,10 +471,13 @@ static bool checkAddrInSegment(const Elf_Phdr *phdr, size_t image_base,
|
||||
return false;
|
||||
}
|
||||
|
||||
int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, void *data) {
|
||||
int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t pinfo_size,
|
||||
void *data) {
|
||||
auto cbdata = static_cast<dl_iterate_cb_data *>(data);
|
||||
if (pinfo->dlpi_phnum == 0 || cbdata->targetAddr < pinfo->dlpi_addr)
|
||||
return 0;
|
||||
if (ProcessFrameHeaderCache.find(pinfo, pinfo_size, data))
|
||||
return 1;
|
||||
|
||||
Elf_Addr image_base = calculateImageBase(pinfo);
|
||||
bool found_obj = false;
|
||||
@ -496,8 +504,10 @@ int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, void *data) {
|
||||
} else if (!found_obj) {
|
||||
found_obj = checkAddrInSegment(phdr, image_base, cbdata);
|
||||
}
|
||||
if (found_obj && found_hdr)
|
||||
if (found_obj && found_hdr) {
|
||||
ProcessFrameHeaderCache.add(cbdata->sects);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
cbdata->sects->dwarf_section_length = 0;
|
||||
return 0;
|
||||
|
149
libunwind/src/FrameHeaderCache.hpp
Normal file
149
libunwind/src/FrameHeaderCache.hpp
Normal file
@ -0,0 +1,149 @@
|
||||
//===-FrameHeaderCache.hpp ------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
// Cache the elf program headers necessary to unwind the stack more efficiently
|
||||
// in the presence of many dsos.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __FRAMEHEADER_CACHE_HPP__
|
||||
#define __FRAMEHEADER_CACHE_HPP__
|
||||
|
||||
#include "config.h"
|
||||
#include <limits.h>
|
||||
|
||||
#ifdef _LIBUNWIND_DEBUG_FRAMEHEADER_CACHE
|
||||
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE0(x) _LIBUNWIND_LOG0(x)
|
||||
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE(msg, ...) \
|
||||
_LIBUNWIND_LOG(msg, __VA_ARGS__)
|
||||
#else
|
||||
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE0(x)
|
||||
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE(msg, ...)
|
||||
#endif
|
||||
|
||||
// This cache should only be be used from within a dl_iterate_phdr callback.
|
||||
// dl_iterate_phdr does the necessary synchronization to prevent problems
|
||||
// with concurrent access via the libc load lock. Adding synchronization
|
||||
// for other uses is possible, but not currently done.
|
||||
|
||||
class _LIBUNWIND_HIDDEN FrameHeaderCache {
|
||||
struct CacheEntry {
|
||||
uintptr_t LowPC() { return Info.dso_base; };
|
||||
uintptr_t HighPC() { return Info.dso_base + Info.dwarf_section_length; };
|
||||
UnwindInfoSections Info;
|
||||
CacheEntry *Next;
|
||||
};
|
||||
|
||||
static const size_t kCacheEntryCount = 8;
|
||||
|
||||
// Can't depend on the C++ standard library in libunwind, so use an array to
|
||||
// allocate the entries, and two linked lists for ordering unused and recently
|
||||
// used entries. FIXME: Would the the extra memory for a doubly-linked list
|
||||
// be better than the runtime cost of traversing a very short singly-linked
|
||||
// list on a cache miss? The entries themselves are all small and consecutive,
|
||||
// so unlikely to cause page faults when following the pointers. The memory
|
||||
// spent on additional pointers could also be spent on more entries.
|
||||
|
||||
CacheEntry Entries[kCacheEntryCount];
|
||||
CacheEntry *MostRecentlyUsed;
|
||||
CacheEntry *Unused;
|
||||
|
||||
void resetCache() {
|
||||
_LIBUNWIND_FRAMEHEADERCACHE_TRACE0("FrameHeaderCache reset");
|
||||
MostRecentlyUsed = nullptr;
|
||||
Unused = &Entries[0];
|
||||
for (size_t i = 0; i < kCacheEntryCount - 1; i++) {
|
||||
Entries[i].Next = &Entries[i + 1];
|
||||
}
|
||||
Entries[kCacheEntryCount - 1].Next = nullptr;
|
||||
}
|
||||
|
||||
bool cacheNeedsReset(dl_phdr_info *PInfo) {
|
||||
// C libraries increment dl_phdr_info.adds and dl_phdr_info.subs when
|
||||
// loading and unloading shared libraries. If these values change between
|
||||
// iterations of dl_iterate_phdr, then invalidate the cache.
|
||||
|
||||
// These are static to avoid needing an initializer, and unsigned long long
|
||||
// because that is their type within the extended dl_phdr_info. Initialize
|
||||
// these to something extremely unlikely to be found upon the first call to
|
||||
// dl_iterate_phdr.
|
||||
static unsigned long long LastAdds = ULLONG_MAX;
|
||||
static unsigned long long LastSubs = ULLONG_MAX;
|
||||
if (PInfo->dlpi_adds != LastAdds || PInfo->dlpi_subs != LastSubs) {
|
||||
// Resetting the entire cache is a big hammer, but this path is rare--
|
||||
// usually just on the very first call, when the cache is empty anyway--so
|
||||
// added complexity doesn't buy much.
|
||||
LastAdds = PInfo->dlpi_adds;
|
||||
LastSubs = PInfo->dlpi_subs;
|
||||
resetCache();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public:
|
||||
bool find(dl_phdr_info *PInfo, size_t, void *data) {
|
||||
if (cacheNeedsReset(PInfo) || MostRecentlyUsed == nullptr)
|
||||
return false;
|
||||
|
||||
auto *CBData = static_cast<dl_iterate_cb_data *>(data);
|
||||
CacheEntry *Current = MostRecentlyUsed;
|
||||
CacheEntry *Previous = nullptr;
|
||||
while (Current != nullptr) {
|
||||
_LIBUNWIND_FRAMEHEADERCACHE_TRACE(
|
||||
"FrameHeaderCache check %lx in [%lx - %lx)", CBData->targetAddr,
|
||||
Current->LowPC(), Current->HighPC());
|
||||
if (Current->LowPC() <= CBData->targetAddr &&
|
||||
CBData->targetAddr < Current->HighPC()) {
|
||||
_LIBUNWIND_FRAMEHEADERCACHE_TRACE(
|
||||
"FrameHeaderCache hit %lx in [%lx - %lx)", CBData->targetAddr,
|
||||
Current->LowPC(), Current->HighPC());
|
||||
if (Previous) {
|
||||
// If there is no Previous, then Current is already the
|
||||
// MostRecentlyUsed, and no need to move it up.
|
||||
Previous->Next = Current->Next;
|
||||
Current->Next = MostRecentlyUsed;
|
||||
MostRecentlyUsed = Current;
|
||||
}
|
||||
*CBData->sects = Current->Info;
|
||||
return true;
|
||||
}
|
||||
Previous = Current;
|
||||
Current = Current->Next;
|
||||
}
|
||||
_LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache miss for address %lx",
|
||||
CBData->targetAddr);
|
||||
return false;
|
||||
}
|
||||
|
||||
void add(const UnwindInfoSections *UIS) {
|
||||
CacheEntry *Current = nullptr;
|
||||
|
||||
if (Unused != nullptr) {
|
||||
Current = Unused;
|
||||
Unused = Unused->Next;
|
||||
} else {
|
||||
Current = MostRecentlyUsed;
|
||||
CacheEntry *Previous = nullptr;
|
||||
while (Current->Next != nullptr) {
|
||||
Previous = Current;
|
||||
Current = Current->Next;
|
||||
}
|
||||
Previous->Next = nullptr;
|
||||
_LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache evict [%lx - %lx)",
|
||||
Current->LowPC(), Current->HighPC());
|
||||
}
|
||||
|
||||
Current->Info = *UIS;
|
||||
Current->Next = MostRecentlyUsed;
|
||||
MostRecentlyUsed = Current;
|
||||
_LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache add [%lx - %lx)",
|
||||
MostRecentlyUsed->LowPC(),
|
||||
MostRecentlyUsed->HighPC());
|
||||
}
|
||||
};
|
||||
|
||||
#endif // __FRAMEHEADER_CACHE_HPP__
|
82
libunwind/test/frameheadercache_test.pass.cpp
Normal file
82
libunwind/test/frameheadercache_test.pass.cpp
Normal file
@ -0,0 +1,82 @@
|
||||
// The other libunwind tests don't test internal interfaces, so the include path
|
||||
// is a little wonky.
|
||||
#include "../src/config.h"
|
||||
|
||||
// Only run this test under supported configurations.
|
||||
// This #if chain is ugly, but see the comments in AddressSpace.hpp for
|
||||
// the reasoning.
|
||||
|
||||
#ifdef __APPLE__
|
||||
int main() { return 0; }
|
||||
#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL)
|
||||
int main() { return 0; }
|
||||
#elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL)
|
||||
int main() { return 0; }
|
||||
#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32)
|
||||
int main() { return 0; }
|
||||
#elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)
|
||||
int main() { return 0; }
|
||||
#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__)
|
||||
int main() { return 0; }
|
||||
#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
|
||||
|
||||
#include <link.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// This file defines several of the data structures needed here,
|
||||
// and includes FrameHeaderCache.hpp as well.
|
||||
#include "../src/AddressSpace.hpp"
|
||||
|
||||
#define kBaseAddr 0xFFF000
|
||||
#define kDwarfSectionLength 0xFF
|
||||
|
||||
using namespace libunwind;
|
||||
|
||||
int main() {
|
||||
FrameHeaderCache FHC;
|
||||
struct dl_phdr_info PInfo;
|
||||
memset(&PInfo, 0, sizeof(PInfo));
|
||||
// The cache itself should only care about these two fields--they
|
||||
// tell the cache to invalidate or not; everything else is handled
|
||||
// by AddressSpace.hpp.
|
||||
PInfo.dlpi_adds = 6;
|
||||
PInfo.dlpi_subs = 7;
|
||||
|
||||
UnwindInfoSections UIS;
|
||||
UIS.dso_base = kBaseAddr;
|
||||
UIS.dwarf_section_length = kDwarfSectionLength;
|
||||
dl_iterate_cb_data CBData;
|
||||
// Unused by the cache.
|
||||
CBData.addressSpace = nullptr;
|
||||
CBData.sects = &UIS;
|
||||
CBData.targetAddr = kBaseAddr + 1;
|
||||
|
||||
// Nothing present, shouldn't find.
|
||||
if (FHC.find(&PInfo, 0, &CBData))
|
||||
abort();
|
||||
FHC.add(&UIS);
|
||||
// Just added. Should find.
|
||||
if (!FHC.find(&PInfo, 0, &CBData))
|
||||
abort();
|
||||
// Cache is invalid. Shouldn't find.
|
||||
PInfo.dlpi_adds++;
|
||||
if (FHC.find(&PInfo, 0, &CBData))
|
||||
abort();
|
||||
|
||||
FHC.add(&UIS);
|
||||
CBData.targetAddr = kBaseAddr - 1;
|
||||
// Shouldn't find something outside of the addresses.
|
||||
if (FHC.find(&PInfo, 0, &CBData))
|
||||
abort();
|
||||
// Add enough things to the cache that the entry is evicted.
|
||||
for (int i = 0; i < 9; i++) {
|
||||
UIS.dso_base = kBaseAddr + (kDwarfSectionLength * i);
|
||||
FHC.add(&UIS);
|
||||
}
|
||||
CBData.targetAddr = kBaseAddr;
|
||||
// Should have been evicted.
|
||||
if (FHC.find(&PInfo, 0, &CBData))
|
||||
abort();
|
||||
return 0;
|
||||
}
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user