LookupCache: Add a third level cache that will always resolve all known blocks

2025-01-10 15:50:18 +00:00 · 2021-01-29 01:53:12 +02:00 · 2021-01-29 01:53:12 +02:00 · 791aa4c32c
commit 791aa4c32c
parent 0c9e05ed8f
5 changed files with 73 additions and 57 deletions
--- a/External/FEXCore/Source/Interface/Context/Context.h
+++ b/External/FEXCore/Source/Interface/Context/Context.h
@ -166,7 +166,7 @@ namespace FEXCore::Context {
    void ExecutionThread(FEXCore::Core::InternalThreadState *Thread);
    void NotifyPause();

-    uintptr_t AddBlockMapping(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, void *Ptr);
+    void AddBlockMapping(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, void *Ptr);

    FEXCore::CodeLoader *LocalLoader{};

--- a/External/FEXCore/Source/Interface/Core/CompileService.cpp
+++ b/External/FEXCore/Source/Interface/Core/CompileService.cpp
@ -150,11 +150,7 @@ namespace FEXCore {
            ERROR_AND_DIE("Couldn't compile code for thread at RIP: 0x%lx", Item->RIP);
          }

-          auto BlockMapPtr = CompileThreadData->LookupCache->AddBlockMapping(Item->RIP, CompiledCode);
-          if (BlockMapPtr == 0) {
-            // XXX: We currently have the expectation that compiler service block cache will be significantly underutilized compared to regular thread
-            ERROR_AND_DIE("Couldn't add code to block cache for thread at RIP: 0x%lx", Item->RIP);
-          }
+          CompileThreadData->LookupCache->AddBlockMapping(Item->RIP, CompiledCode);

          Item->CodePtr = CompiledCode;
          Item->IRList = CompileThreadData->IRLists.find(Item->RIP)->second.get();
--- a/External/FEXCore/Source/Interface/Core/Core.cpp
+++ b/External/FEXCore/Source/Interface/Core/Core.cpp
@ -559,24 +559,8 @@ namespace FEXCore::Context {
    return Thread;
  }

-  uintptr_t Context::AddBlockMapping(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, void *Ptr) {
-    auto BlockMapPtr = Thread->LookupCache->AddBlockMapping(Address, Ptr);
-    if (BlockMapPtr == 0) {
-      Thread->LookupCache->ClearCache();
-
-      // Pull out the current IR we added and store it back after we cleared the rest of the list
-      // Needed in the case the the block mapping has aliased
-      auto iter = Thread->IRLists.find(Address);
-      if (iter != Thread->IRLists.end()) {
-        auto IR = iter->second.release();
-        Thread->IRLists.clear();
-        Thread->IRLists.try_emplace(Address, IR);
-      }
-      BlockMapPtr = Thread->LookupCache->AddBlockMapping(Address, Ptr);
-      LogMan::Throw::A(BlockMapPtr, "Couldn't add mapping after clearing mapping cache");
-    }
-
-    return BlockMapPtr;
+  void Context::AddBlockMapping(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, void *Ptr) {
+    Thread->LookupCache->AddBlockMapping(Address, Ptr);
  }

  void Context::ClearCodeCache(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP) {
@ -815,6 +799,13 @@ namespace FEXCore::Context {
  }

  uintptr_t Context::CompileBlock(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP) {
+    
+    // Is the code in the cache?
+    // The backends only check L1 and L2, not L3
+    if (auto HostCode = Thread->LookupCache->FindBlock(GuestRIP)) {
+      return HostCode;
+    }
+
    void *CodePtr;
    FEXCore::Core::DebugData *DebugData;
    bool DecrementRefCount = false;
@ -855,7 +846,9 @@ namespace FEXCore::Context {

      if (DecrementRefCount)
        --Thread->CompileBlockReentrantRefCount;
-      return AddBlockMapping(Thread, GuestRIP, CodePtr);
+      AddBlockMapping(Thread, GuestRIP, CodePtr);
+
+      return (uintptr_t)CodePtr;
    }

    if (DecrementRefCount)
@ -869,12 +862,8 @@ namespace FEXCore::Context {
    // This will most likely fail since regular code use won't be using a fallback core.
    // It's mainly for testing new instruction encodings
    void *CodePtr = Thread->FallbackBackend->CompileCode(nullptr, nullptr);
-    if (CodePtr) {
-     uintptr_t Ptr = reinterpret_cast<uintptr_t >(AddBlockMapping(Thread, GuestRIP, CodePtr));
-     return Ptr;
-    }
-
-    return 0;
+    AddBlockMapping(Thread, GuestRIP, CodePtr);
+    return (uintptr_t)CodePtr;
  }

  void Context::ExecutionThread(FEXCore::Core::InternalThreadState *Thread) {
--- a/External/FEXCore/Source/Interface/Core/LookupCache.cpp
+++ b/External/FEXCore/Source/Interface/Core/LookupCache.cpp
@ -50,14 +50,22 @@ void LookupCache::HintUsedRange(uint64_t Address, uint64_t Size) {
  madvise(reinterpret_cast<void*>(PagePointer + Address), Size, MADV_WILLNEED);
 }

-void LookupCache::ClearCache() {
+void LookupCache::ClearL2Cache() {
  // Clear out the page memory
  madvise(reinterpret_cast<void*>(PagePointer), ctx->Config.VirtualMemSize / 4096 * 8, MADV_DONTNEED);
  madvise(reinterpret_cast<void*>(PageMemory), CODE_SIZE, MADV_DONTNEED);
-  madvise(reinterpret_cast<void*>(L1Pointer), L1_SIZE, MADV_DONTNEED);
  AllocateOffset = 0;
+}
+
+void LookupCache::ClearCache() {
+  // Clear L1
+  madvise(reinterpret_cast<void*>(L1Pointer), L1_SIZE, MADV_DONTNEED);
+  // Clear L2
+  ClearL2Cache();
  // All code is gone, remove links
  BlockLinks.clear();
+  // All code is gone, clear the block list
+  BlockList.clear();
 }

 }
--- a/External/FEXCore/Source/Interface/Core/LookupCache.h
+++ b/External/FEXCore/Source/Interface/Core/LookupCache.h
@ -18,7 +18,26 @@ public:
  uintptr_t End() { return 0; }

  uintptr_t FindBlock(uint64_t Address) {
-    return FindCodePointerForAddress(Address);
+    auto HostCode = FindCodePointerForAddress(Address);
+    if (HostCode) {
+      return HostCode;
+    } else {
+      auto HostCode = BlockList.find(Address);
+
+      if (HostCode != BlockList.end()) {
+        CacheBlockMapping(Address, HostCode->second);
+        return HostCode->second;
+      } else {
+        return 0;
+      }
+    }
+  }
+
+  void AddBlockMapping(uint64_t Address, void *HostCode) { 
+    auto InsertPoint = BlockList.emplace(Address, (uintptr_t)HostCode);
+    LogMan::Throw::A(InsertPoint.second == true, "Dupplicate block mapping added");
+
+    // no need to update L1 or L2, they will get updated on first lookup
  }

  void Erase(uint64_t Address) {
@ -30,6 +49,9 @@ public:
      it->second();
    }

+    // Remove from BlockList
+    BlockList.erase(Address);
+
    // Do L1
    auto &L1Entry = reinterpret_cast<LookupCacheEntry*>(L1Pointer)[Address & L1_ENTRIES_MASK];
    if (L1Entry.GuestCode == Address) {
@ -54,7 +76,25 @@ public:
    BlockPointers[PageOffset].HostCode = 0;
  }

-  uintptr_t AddBlockMapping(uint64_t Address, void *Ptr) { 
+
+  void AddBlockLink(uint64_t GuestDestination, uintptr_t HostLink, const std::function<void()> &delinker) {
+    BlockLinks.insert({{GuestDestination, HostLink}, delinker});
+  }
+
+  void ClearCache();
+  void ClearL2Cache();
+
+  void HintUsedRange(uint64_t Address, uint64_t Size);
+
+  uintptr_t GetL1Pointer() { return L1Pointer; }
+  uintptr_t GetPagePointer() { return PagePointer; }
+  uintptr_t GetVirtualMemorySize() const { return VirtualMemSize; }
+
+  constexpr static size_t L1_ENTRIES = 1 * 1024 * 1024; // Must be a power of 2
+  constexpr static size_t L1_ENTRIES_MASK = L1_ENTRIES - 1;
+
+private:
+  void CacheBlockMapping(uint64_t Address, uintptr_t HostCode) { 
    // Do L1
    auto &L1Entry = reinterpret_cast<LookupCacheEntry*>(L1Pointer)[Address & L1_ENTRIES_MASK];
    if (L1Entry.GuestCode == Address) {
@ -74,8 +114,9 @@ public:
      // Allocate one now if we can
      uintptr_t NewPageBacking = AllocateBackingForPage();
      if (!NewPageBacking) {
-        // Couldn't allocate, return so the frontend can recover from this
-        return 0;
+        // Couldn't allocate, clear L2 and retry
+        ClearL2Cache();
+        CacheBlockMapping(Address, HostCode);
      }
      Pointers[Address] = NewPageBacking;
      LocalPagePointer = NewPageBacking;
@ -83,31 +124,12 @@ public:

    // Add the new pointer to the page block
    auto BlockPointers = reinterpret_cast<LookupCacheEntry*>(LocalPagePointer);
-    uintptr_t CastPtr = reinterpret_cast<uintptr_t>(Ptr);

    // This silently replaces existing mappings
    BlockPointers[PageOffset].GuestCode = FullAddress;
-    BlockPointers[PageOffset].HostCode = CastPtr;
-
-    return CastPtr;
+    BlockPointers[PageOffset].HostCode = HostCode;
  }

-  void AddBlockLink(uint64_t GuestDestination, uintptr_t HostLink, const std::function<void()> &delinker) {
-    BlockLinks.insert({{GuestDestination, HostLink}, delinker});
-  }
-
-  void ClearCache();
-
-  void HintUsedRange(uint64_t Address, uint64_t Size);
-
-  uintptr_t GetL1Pointer() { return L1Pointer; }
-  uintptr_t GetPagePointer() { return PagePointer; }
-  uintptr_t GetVirtualMemorySize() const { return VirtualMemSize; }
-
-  constexpr static size_t L1_ENTRIES = 1 * 1024 * 1024; // Must be a power of 2
-  constexpr static size_t L1_ENTRIES_MASK = L1_ENTRIES - 1;
-
-private:
  uintptr_t AllocateBackingForPage() {
    uintptr_t NewBase = AllocateOffset;
    uintptr_t NewEnd = AllocateOffset + SIZE_PER_PAGE;
@ -173,6 +195,7 @@ private:
  };

  std::map<BlockLinkTag, std::function<void()>> BlockLinks;
+  std::map<uint64_t, uint64_t> BlockList;

  constexpr static size_t CODE_SIZE = 128 * 1024 * 1024;
  constexpr static size_t SIZE_PER_PAGE = 4096 * sizeof(LookupCacheEntry);