Cache uwnind frame headers as they are found.

Summary:
This improves unwind performance quite substantially, and follows
a somewhat similar approach used in libgcc_s as described in the
thread here:

https://gcc.gnu.org/ml/gcc/2005-02/msg00625.html

On certain extremely exception heavy internal tests, the time
drops from about 80 minutes to about five minutes.

Subscribers: libcxx-commits

Tags: #libc

Differential Revision: https://reviews.llvm.org/D75954
This commit is contained in:
Sterling Augustine 2020-03-10 12:03:24 -07:00
parent 30804d0a3f
commit c53c2058ff
3 changed files with 243 additions and 2 deletions

View File

@ -452,6 +452,11 @@ struct _LIBUNWIND_HIDDEN dl_iterate_cb_data {
#error "_LIBUNWIND_SUPPORT_DWARF_UNWIND requires _LIBUNWIND_SUPPORT_DWARF_INDEX on this platform."
#endif
#include "FrameHeaderCache.hpp"
// There should be just one of these per process.
static FrameHeaderCache ProcessFrameHeaderCache;
static bool checkAddrInSegment(const Elf_Phdr *phdr, size_t image_base,
dl_iterate_cb_data *cbdata) {
if (phdr->p_type == PT_LOAD) {
@ -466,10 +471,13 @@ static bool checkAddrInSegment(const Elf_Phdr *phdr, size_t image_base,
return false;
}
int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, void *data) {
int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t pinfo_size,
void *data) {
auto cbdata = static_cast<dl_iterate_cb_data *>(data);
if (pinfo->dlpi_phnum == 0 || cbdata->targetAddr < pinfo->dlpi_addr)
return 0;
if (ProcessFrameHeaderCache.find(pinfo, pinfo_size, data))
return 1;
Elf_Addr image_base = calculateImageBase(pinfo);
bool found_obj = false;
@ -496,8 +504,10 @@ int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, void *data) {
} else if (!found_obj) {
found_obj = checkAddrInSegment(phdr, image_base, cbdata);
}
if (found_obj && found_hdr)
if (found_obj && found_hdr) {
ProcessFrameHeaderCache.add(cbdata->sects);
return 1;
}
}
cbdata->sects->dwarf_section_length = 0;
return 0;

View File

@ -0,0 +1,149 @@
//===-FrameHeaderCache.hpp ------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Cache the elf program headers necessary to unwind the stack more efficiently
// in the presence of many dsos.
//
//===----------------------------------------------------------------------===//
#ifndef __FRAMEHEADER_CACHE_HPP__
#define __FRAMEHEADER_CACHE_HPP__
#include "config.h"
#include <limits.h>
#ifdef _LIBUNWIND_DEBUG_FRAMEHEADER_CACHE
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE0(x) _LIBUNWIND_LOG0(x)
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE(msg, ...) \
_LIBUNWIND_LOG(msg, __VA_ARGS__)
#else
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE0(x)
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE(msg, ...)
#endif
// This cache should only be be used from within a dl_iterate_phdr callback.
// dl_iterate_phdr does the necessary synchronization to prevent problems
// with concurrent access via the libc load lock. Adding synchronization
// for other uses is possible, but not currently done.
class _LIBUNWIND_HIDDEN FrameHeaderCache {
struct CacheEntry {
uintptr_t LowPC() { return Info.dso_base; };
uintptr_t HighPC() { return Info.dso_base + Info.dwarf_section_length; };
UnwindInfoSections Info;
CacheEntry *Next;
};
static const size_t kCacheEntryCount = 8;
// Can't depend on the C++ standard library in libunwind, so use an array to
// allocate the entries, and two linked lists for ordering unused and recently
// used entries. FIXME: Would the the extra memory for a doubly-linked list
// be better than the runtime cost of traversing a very short singly-linked
// list on a cache miss? The entries themselves are all small and consecutive,
// so unlikely to cause page faults when following the pointers. The memory
// spent on additional pointers could also be spent on more entries.
CacheEntry Entries[kCacheEntryCount];
CacheEntry *MostRecentlyUsed;
CacheEntry *Unused;
void resetCache() {
_LIBUNWIND_FRAMEHEADERCACHE_TRACE0("FrameHeaderCache reset");
MostRecentlyUsed = nullptr;
Unused = &Entries[0];
for (size_t i = 0; i < kCacheEntryCount - 1; i++) {
Entries[i].Next = &Entries[i + 1];
}
Entries[kCacheEntryCount - 1].Next = nullptr;
}
bool cacheNeedsReset(dl_phdr_info *PInfo) {
// C libraries increment dl_phdr_info.adds and dl_phdr_info.subs when
// loading and unloading shared libraries. If these values change between
// iterations of dl_iterate_phdr, then invalidate the cache.
// These are static to avoid needing an initializer, and unsigned long long
// because that is their type within the extended dl_phdr_info. Initialize
// these to something extremely unlikely to be found upon the first call to
// dl_iterate_phdr.
static unsigned long long LastAdds = ULLONG_MAX;
static unsigned long long LastSubs = ULLONG_MAX;
if (PInfo->dlpi_adds != LastAdds || PInfo->dlpi_subs != LastSubs) {
// Resetting the entire cache is a big hammer, but this path is rare--
// usually just on the very first call, when the cache is empty anyway--so
// added complexity doesn't buy much.
LastAdds = PInfo->dlpi_adds;
LastSubs = PInfo->dlpi_subs;
resetCache();
return true;
}
return false;
}
public:
bool find(dl_phdr_info *PInfo, size_t, void *data) {
if (cacheNeedsReset(PInfo) || MostRecentlyUsed == nullptr)
return false;
auto *CBData = static_cast<dl_iterate_cb_data *>(data);
CacheEntry *Current = MostRecentlyUsed;
CacheEntry *Previous = nullptr;
while (Current != nullptr) {
_LIBUNWIND_FRAMEHEADERCACHE_TRACE(
"FrameHeaderCache check %lx in [%lx - %lx)", CBData->targetAddr,
Current->LowPC(), Current->HighPC());
if (Current->LowPC() <= CBData->targetAddr &&
CBData->targetAddr < Current->HighPC()) {
_LIBUNWIND_FRAMEHEADERCACHE_TRACE(
"FrameHeaderCache hit %lx in [%lx - %lx)", CBData->targetAddr,
Current->LowPC(), Current->HighPC());
if (Previous) {
// If there is no Previous, then Current is already the
// MostRecentlyUsed, and no need to move it up.
Previous->Next = Current->Next;
Current->Next = MostRecentlyUsed;
MostRecentlyUsed = Current;
}
*CBData->sects = Current->Info;
return true;
}
Previous = Current;
Current = Current->Next;
}
_LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache miss for address %lx",
CBData->targetAddr);
return false;
}
void add(const UnwindInfoSections *UIS) {
CacheEntry *Current = nullptr;
if (Unused != nullptr) {
Current = Unused;
Unused = Unused->Next;
} else {
Current = MostRecentlyUsed;
CacheEntry *Previous = nullptr;
while (Current->Next != nullptr) {
Previous = Current;
Current = Current->Next;
}
Previous->Next = nullptr;
_LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache evict [%lx - %lx)",
Current->LowPC(), Current->HighPC());
}
Current->Info = *UIS;
Current->Next = MostRecentlyUsed;
MostRecentlyUsed = Current;
_LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache add [%lx - %lx)",
MostRecentlyUsed->LowPC(),
MostRecentlyUsed->HighPC());
}
};
#endif // __FRAMEHEADER_CACHE_HPP__

View File

@ -0,0 +1,82 @@
// The other libunwind tests don't test internal interfaces, so the include path
// is a little wonky.
#include "../src/config.h"
// Only run this test under supported configurations.
// This #if chain is ugly, but see the comments in AddressSpace.hpp for
// the reasoning.
#ifdef __APPLE__
int main() { return 0; }
#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL)
int main() { return 0; }
#elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL)
int main() { return 0; }
#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32)
int main() { return 0; }
#elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)
int main() { return 0; }
#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__)
int main() { return 0; }
#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
#include <link.h>
#include <stdio.h>
// This file defines several of the data structures needed here,
// and includes FrameHeaderCache.hpp as well.
#include "../src/AddressSpace.hpp"
#define kBaseAddr 0xFFF000
#define kDwarfSectionLength 0xFF
using namespace libunwind;
int main() {
FrameHeaderCache FHC;
struct dl_phdr_info PInfo;
memset(&PInfo, 0, sizeof(PInfo));
// The cache itself should only care about these two fields--they
// tell the cache to invalidate or not; everything else is handled
// by AddressSpace.hpp.
PInfo.dlpi_adds = 6;
PInfo.dlpi_subs = 7;
UnwindInfoSections UIS;
UIS.dso_base = kBaseAddr;
UIS.dwarf_section_length = kDwarfSectionLength;
dl_iterate_cb_data CBData;
// Unused by the cache.
CBData.addressSpace = nullptr;
CBData.sects = &UIS;
CBData.targetAddr = kBaseAddr + 1;
// Nothing present, shouldn't find.
if (FHC.find(&PInfo, 0, &CBData))
abort();
FHC.add(&UIS);
// Just added. Should find.
if (!FHC.find(&PInfo, 0, &CBData))
abort();
// Cache is invalid. Shouldn't find.
PInfo.dlpi_adds++;
if (FHC.find(&PInfo, 0, &CBData))
abort();
FHC.add(&UIS);
CBData.targetAddr = kBaseAddr - 1;
// Shouldn't find something outside of the addresses.
if (FHC.find(&PInfo, 0, &CBData))
abort();
// Add enough things to the cache that the entry is evicted.
for (int i = 0; i < 9; i++) {
UIS.dso_base = kBaseAddr + (kDwarfSectionLength * i);
FHC.add(&UIS);
}
CBData.targetAddr = kBaseAddr;
// Should have been evicted.
if (FHC.find(&PInfo, 0, &CBData))
abort();
return 0;
}
#endif