LookupCache: Add a third level cache that will always resolve all known blocks

This commit is contained in:
Stefanos Kornilios Mitsis Poiitidis 2021-01-29 01:53:12 +02:00
parent 0c9e05ed8f
commit 791aa4c32c
5 changed files with 73 additions and 57 deletions

View File

@ -166,7 +166,7 @@ namespace FEXCore::Context {
void ExecutionThread(FEXCore::Core::InternalThreadState *Thread);
void NotifyPause();
uintptr_t AddBlockMapping(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, void *Ptr);
void AddBlockMapping(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, void *Ptr);
FEXCore::CodeLoader *LocalLoader{};

View File

@ -150,11 +150,7 @@ namespace FEXCore {
ERROR_AND_DIE("Couldn't compile code for thread at RIP: 0x%lx", Item->RIP);
}
auto BlockMapPtr = CompileThreadData->LookupCache->AddBlockMapping(Item->RIP, CompiledCode);
if (BlockMapPtr == 0) {
// XXX: We currently have the expectation that compiler service block cache will be significantly underutilized compared to regular thread
ERROR_AND_DIE("Couldn't add code to block cache for thread at RIP: 0x%lx", Item->RIP);
}
CompileThreadData->LookupCache->AddBlockMapping(Item->RIP, CompiledCode);
Item->CodePtr = CompiledCode;
Item->IRList = CompileThreadData->IRLists.find(Item->RIP)->second.get();

View File

@ -559,24 +559,8 @@ namespace FEXCore::Context {
return Thread;
}
uintptr_t Context::AddBlockMapping(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, void *Ptr) {
auto BlockMapPtr = Thread->LookupCache->AddBlockMapping(Address, Ptr);
if (BlockMapPtr == 0) {
Thread->LookupCache->ClearCache();
// Pull out the current IR we added and store it back after we cleared the rest of the list
// Needed in the case the the block mapping has aliased
auto iter = Thread->IRLists.find(Address);
if (iter != Thread->IRLists.end()) {
auto IR = iter->second.release();
Thread->IRLists.clear();
Thread->IRLists.try_emplace(Address, IR);
}
BlockMapPtr = Thread->LookupCache->AddBlockMapping(Address, Ptr);
LogMan::Throw::A(BlockMapPtr, "Couldn't add mapping after clearing mapping cache");
}
return BlockMapPtr;
void Context::AddBlockMapping(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, void *Ptr) {
Thread->LookupCache->AddBlockMapping(Address, Ptr);
}
void Context::ClearCodeCache(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP) {
@ -815,6 +799,13 @@ namespace FEXCore::Context {
}
uintptr_t Context::CompileBlock(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP) {
// Is the code in the cache?
// The backends only check L1 and L2, not L3
if (auto HostCode = Thread->LookupCache->FindBlock(GuestRIP)) {
return HostCode;
}
void *CodePtr;
FEXCore::Core::DebugData *DebugData;
bool DecrementRefCount = false;
@ -855,7 +846,9 @@ namespace FEXCore::Context {
if (DecrementRefCount)
--Thread->CompileBlockReentrantRefCount;
return AddBlockMapping(Thread, GuestRIP, CodePtr);
AddBlockMapping(Thread, GuestRIP, CodePtr);
return (uintptr_t)CodePtr;
}
if (DecrementRefCount)
@ -869,12 +862,8 @@ namespace FEXCore::Context {
// This will most likely fail since regular code use won't be using a fallback core.
// It's mainly for testing new instruction encodings
void *CodePtr = Thread->FallbackBackend->CompileCode(nullptr, nullptr);
if (CodePtr) {
uintptr_t Ptr = reinterpret_cast<uintptr_t >(AddBlockMapping(Thread, GuestRIP, CodePtr));
return Ptr;
}
return 0;
AddBlockMapping(Thread, GuestRIP, CodePtr);
return (uintptr_t)CodePtr;
}
void Context::ExecutionThread(FEXCore::Core::InternalThreadState *Thread) {

View File

@ -50,14 +50,22 @@ void LookupCache::HintUsedRange(uint64_t Address, uint64_t Size) {
madvise(reinterpret_cast<void*>(PagePointer + Address), Size, MADV_WILLNEED);
}
void LookupCache::ClearCache() {
void LookupCache::ClearL2Cache() {
// Clear out the page memory
madvise(reinterpret_cast<void*>(PagePointer), ctx->Config.VirtualMemSize / 4096 * 8, MADV_DONTNEED);
madvise(reinterpret_cast<void*>(PageMemory), CODE_SIZE, MADV_DONTNEED);
madvise(reinterpret_cast<void*>(L1Pointer), L1_SIZE, MADV_DONTNEED);
AllocateOffset = 0;
}
void LookupCache::ClearCache() {
// Clear L1
madvise(reinterpret_cast<void*>(L1Pointer), L1_SIZE, MADV_DONTNEED);
// Clear L2
ClearL2Cache();
// All code is gone, remove links
BlockLinks.clear();
// All code is gone, clear the block list
BlockList.clear();
}
}

View File

@ -18,7 +18,26 @@ public:
uintptr_t End() { return 0; }
uintptr_t FindBlock(uint64_t Address) {
return FindCodePointerForAddress(Address);
auto HostCode = FindCodePointerForAddress(Address);
if (HostCode) {
return HostCode;
} else {
auto HostCode = BlockList.find(Address);
if (HostCode != BlockList.end()) {
CacheBlockMapping(Address, HostCode->second);
return HostCode->second;
} else {
return 0;
}
}
}
void AddBlockMapping(uint64_t Address, void *HostCode) {
auto InsertPoint = BlockList.emplace(Address, (uintptr_t)HostCode);
LogMan::Throw::A(InsertPoint.second == true, "Dupplicate block mapping added");
// no need to update L1 or L2, they will get updated on first lookup
}
void Erase(uint64_t Address) {
@ -30,6 +49,9 @@ public:
it->second();
}
// Remove from BlockList
BlockList.erase(Address);
// Do L1
auto &L1Entry = reinterpret_cast<LookupCacheEntry*>(L1Pointer)[Address & L1_ENTRIES_MASK];
if (L1Entry.GuestCode == Address) {
@ -54,7 +76,25 @@ public:
BlockPointers[PageOffset].HostCode = 0;
}
uintptr_t AddBlockMapping(uint64_t Address, void *Ptr) {
void AddBlockLink(uint64_t GuestDestination, uintptr_t HostLink, const std::function<void()> &delinker) {
BlockLinks.insert({{GuestDestination, HostLink}, delinker});
}
void ClearCache();
void ClearL2Cache();
void HintUsedRange(uint64_t Address, uint64_t Size);
uintptr_t GetL1Pointer() { return L1Pointer; }
uintptr_t GetPagePointer() { return PagePointer; }
uintptr_t GetVirtualMemorySize() const { return VirtualMemSize; }
constexpr static size_t L1_ENTRIES = 1 * 1024 * 1024; // Must be a power of 2
constexpr static size_t L1_ENTRIES_MASK = L1_ENTRIES - 1;
private:
void CacheBlockMapping(uint64_t Address, uintptr_t HostCode) {
// Do L1
auto &L1Entry = reinterpret_cast<LookupCacheEntry*>(L1Pointer)[Address & L1_ENTRIES_MASK];
if (L1Entry.GuestCode == Address) {
@ -74,8 +114,9 @@ public:
// Allocate one now if we can
uintptr_t NewPageBacking = AllocateBackingForPage();
if (!NewPageBacking) {
// Couldn't allocate, return so the frontend can recover from this
return 0;
// Couldn't allocate, clear L2 and retry
ClearL2Cache();
CacheBlockMapping(Address, HostCode);
}
Pointers[Address] = NewPageBacking;
LocalPagePointer = NewPageBacking;
@ -83,31 +124,12 @@ public:
// Add the new pointer to the page block
auto BlockPointers = reinterpret_cast<LookupCacheEntry*>(LocalPagePointer);
uintptr_t CastPtr = reinterpret_cast<uintptr_t>(Ptr);
// This silently replaces existing mappings
BlockPointers[PageOffset].GuestCode = FullAddress;
BlockPointers[PageOffset].HostCode = CastPtr;
return CastPtr;
BlockPointers[PageOffset].HostCode = HostCode;
}
void AddBlockLink(uint64_t GuestDestination, uintptr_t HostLink, const std::function<void()> &delinker) {
BlockLinks.insert({{GuestDestination, HostLink}, delinker});
}
void ClearCache();
void HintUsedRange(uint64_t Address, uint64_t Size);
uintptr_t GetL1Pointer() { return L1Pointer; }
uintptr_t GetPagePointer() { return PagePointer; }
uintptr_t GetVirtualMemorySize() const { return VirtualMemSize; }
constexpr static size_t L1_ENTRIES = 1 * 1024 * 1024; // Must be a power of 2
constexpr static size_t L1_ENTRIES_MASK = L1_ENTRIES - 1;
private:
uintptr_t AllocateBackingForPage() {
uintptr_t NewBase = AllocateOffset;
uintptr_t NewEnd = AllocateOffset + SIZE_PER_PAGE;
@ -173,6 +195,7 @@ private:
};
std::map<BlockLinkTag, std::function<void()>> BlockLinks;
std::map<uint64_t, uint64_t> BlockList;
constexpr static size_t CODE_SIZE = 128 * 1024 * 1024;
constexpr static size_t SIZE_PER_PAGE = 4096 * sizeof(LookupCacheEntry);