diff --git a/External/FEXCore/Source/Interface/Context/Context.cpp b/External/FEXCore/Source/Interface/Context/Context.cpp index b45aab007..f1842251d 100644 --- a/External/FEXCore/Source/Interface/Context/Context.cpp +++ b/External/FEXCore/Source/Interface/Context/Context.cpp @@ -151,6 +151,13 @@ namespace FEXCore::Context { return CTX->CPUID.RunFunction(Function); } + bool ReadAOT(FEXCore::Context::Context *CTX, std::istream& stream) { + return CTX->LoadAOTCache(stream); + } + void WriteAOT(FEXCore::Context::Context *CTX, std::ostream& stream) { + CTX->WriteAOTCache(stream); + } + namespace Debug { void CompileRIP(FEXCore::Context::Context *CTX, uint64_t RIP) { CTX->CompileRIP(CTX->ParentThread, RIP); diff --git a/External/FEXCore/Source/Interface/Context/Context.h b/External/FEXCore/Source/Interface/Context/Context.h index 675fcbc9d..17c2403f6 100644 --- a/External/FEXCore/Source/Interface/Context/Context.h +++ b/External/FEXCore/Source/Interface/Context/Context.h @@ -6,13 +6,18 @@ #include "Interface/Core/InternalThreadState.h" #include "Interface/Core/X86HelperGen.h" #include "Interface/IR/PassManager.h" +#include "Interface/IR/Passes/RegisterAllocationPass.h" #include #include #include #include #include +#include +#include #include +#include +#include namespace FEXCore { class ThunkHandler; @@ -30,6 +35,8 @@ class SyscallHandler; namespace FEXCore::IR { class RegisterAllocationPass; + class RegisterAllocationData; + class IRListView; namespace Validation { class IRValidation; } @@ -96,6 +103,14 @@ namespace FEXCore::Context { CustomCPUFactoryType FallbackCPUFactory; std::function CustomExitHandler; + struct AOTCacheEntry { + uint64_t start; + uint64_t len; + uint64_t crc; + IR::IRListView *IR; + IR::RegisterAllocationData *RAData; + }; + std::map AOTCache; #ifdef BLOCKSTATS std::unique_ptr BlockData; #endif @@ -142,11 +157,13 @@ namespace FEXCore::Context { FEXCore::Core::ThreadState *GetThreadState(); void LoadEntryList(); - std::tuple *, FEXCore::IR::RegisterAllocationData *, uint64_t, uint64_t> GenerateIR(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP); + std::tuple GenerateIR(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP); - std::tuple *, FEXCore::Core::DebugData *, FEXCore::IR::RegisterAllocationData *, bool> CompileCode(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP); + std::tuple CompileCode(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP); uintptr_t CompileBlock(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP); + bool LoadAOTCache(std::istream &stream); + void WriteAOTCache(std::ostream &stream); // Used for thread creation from syscalls void InitializeCompiler(FEXCore::Core::InternalThreadState* State, bool CompileThread); FEXCore::Core::InternalThreadState* CreateThread(FEXCore::Core::CPUState *NewThreadState, uint64_t ParentTID); diff --git a/External/FEXCore/Source/Interface/Core/CompileService.h b/External/FEXCore/Source/Interface/Core/CompileService.h index 321ac4326..ae5a54545 100644 --- a/External/FEXCore/Source/Interface/Core/CompileService.h +++ b/External/FEXCore/Source/Interface/Core/CompileService.h @@ -31,7 +31,7 @@ class CompileService final { // Outgoing void *CodePtr{}; - FEXCore::IR::IRListView *IRList{}; + FEXCore::IR::IRListView *IRList{}; FEXCore::IR::RegisterAllocationData *RAData{}; FEXCore::Core::DebugData *DebugData{}; diff --git a/External/FEXCore/Source/Interface/Core/Core.cpp b/External/FEXCore/Source/Interface/Core/Core.cpp index c6a31c2ab..6b306a2a6 100644 --- a/External/FEXCore/Source/Interface/Core/Core.cpp +++ b/External/FEXCore/Source/Interface/Core/Core.cpp @@ -35,6 +35,8 @@ namespace FEXCore::CPU { } } +static std::mutex AOTCacheLock; + namespace FEXCore::Core { struct ThreadLocalData { FEXCore::Core::InternalThreadState* Thread; @@ -111,7 +113,7 @@ namespace DefaultFallbackCore { void Initialize() override {} bool NeedsOpDispatch() override { return false; } - void *CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) override { + void *CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) override { LogMan::Msg::E("Fell back to default code handler at RIP: 0x%lx", ThreadState->State.State.rip); return nullptr; } @@ -572,7 +574,7 @@ namespace FEXCore::Context { } } - std::tuple *, FEXCore::IR::RegisterAllocationData *, uint64_t, uint64_t> Context::GenerateIR(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP) { + std::tuple Context::GenerateIR(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP) { uint8_t const *GuestCode{}; GuestCode = reinterpret_cast(GuestRIP); @@ -764,8 +766,8 @@ namespace FEXCore::Context { return {IRList, RAData.release(), TotalInstructions, TotalInstructionsLength}; } - std::tuple *, FEXCore::Core::DebugData *, FEXCore::IR::RegisterAllocationData *, bool> Context::CompileCode(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP) { - FEXCore::IR::IRListView *IRList {}; + std::tuple Context::CompileCode(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP) { + FEXCore::IR::IRListView *IRList {}; FEXCore::Core::DebugData *DebugData {}; FEXCore::IR::RegisterAllocationData *RAData {}; bool GeneratedIR {}; @@ -781,8 +783,21 @@ namespace FEXCore::Context { RAData = Thread->RALists.find(GuestRIP)->second.get(); GeneratedIR = false; - } else { + } + if (IRList == nullptr) { + std::lock_guard lk(AOTCacheLock); + auto AOTEntry = AOTCache.find(GuestRIP); + if (AOTEntry != AOTCache.end()) { + IRList = AOTEntry->second.IR; + RAData = AOTEntry->second.RAData; + DebugData = new FEXCore::Core::DebugData(); + + GeneratedIR = true; + } + } + + if (IRList == nullptr) { // Generate IR + Meta Info auto [IRCopy, RACopy, TotalInstructions, TotalInstructionsLength] = GenerateIR(Thread, GuestRIP); @@ -806,6 +821,76 @@ namespace FEXCore::Context { return { Thread->CPUBackend->CompileCode(IRList, DebugData, RAData), IRList, DebugData, RAData, GeneratedIR}; } + bool Context::LoadAOTCache(std::istream &stream) { + std::lock_guard lk(AOTCacheLock); + AOTCache.clear(); + uint64_t tag; + stream.read((char*)&tag, sizeof(tag)); + if (!stream || tag != 0xDEADBEEFC0D30000) + return false; + do { + uint64_t addr, start, crc, len; + stream.read((char*)&addr, sizeof(addr)); + if (!stream) + return true; + + stream.read((char*)&start, sizeof(start)); + if (!stream) + return false; + stream.read((char*)&len, sizeof(len)); + if (!stream) + return false; + stream.read((char*)&crc, sizeof(crc)); + if (!stream) + return false; + auto IR = new IR::IRListView(stream); + if (!stream) { + delete IR; + return false; + } + uint64_t RASize; + stream.read((char*)&RASize, sizeof(RASize)); + if (!stream) { + delete IR; + return false; + } + IR::RegisterAllocationData *RAData = (IR::RegisterAllocationData *)malloc(IR::RegisterAllocationData::Size(RASize)); + RAData->MapCount = RASize; + + stream.read((char*)&RAData->Map[0], sizeof(RAData->Map[0]) * RASize); + + if (!stream) { + delete IR; + return false; + } + stream.read((char*)&RAData->SpillSlotCount, sizeof(RAData->SpillSlotCount)); + if (!stream) { + delete IR; + return false; + } + AOTCache.insert({addr, {start, len, crc, IR, RAData}}); + } while(!stream.eof()); + return true; + } + + void Context::WriteAOTCache(std::ostream &stream) { + std::lock_guard lk(AOTCacheLock); + uint64_t tag = 0xDEADBEEFC0D30000; + stream.write((char*)&tag, sizeof(tag)); + for (auto entry: AOTCache) { + stream.write((char*)&entry.first, sizeof(entry.first)); + stream.write((char*)&entry.second.start, sizeof(entry.second.start)); + stream.write((char*)&entry.second.len, sizeof(entry.second.len)); + stream.write((char*)&entry.second.crc, sizeof(entry.second.crc)); + entry.second.IR->Serialize(stream); + uint64_t RASize = entry.second.RAData->MapCount; + stream.write((char*)&RASize, sizeof(RASize)); + stream.write((char*)&entry.second.RAData->Map[0], sizeof(entry.second.RAData->Map[0]) * RASize); + stream.write((char*)&entry.second.RAData->SpillSlotCount, sizeof(entry.second.RAData->SpillSlotCount)); + } + } + + uintptr_t Context::CompileBlock(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP) { // Is the code in the cache? @@ -815,7 +900,7 @@ namespace FEXCore::Context { } void *CodePtr {}; - FEXCore::IR::IRListView *IRList {}; + FEXCore::IR::IRListView *IRList {}; FEXCore::Core::DebugData *DebugData {}; FEXCore::IR::RegisterAllocationData *RAData {}; @@ -872,6 +957,11 @@ namespace FEXCore::Context { Thread->IRLists.emplace(GuestRIP, IRList); Thread->DebugData.emplace(GuestRIP, DebugData); Thread->RALists.emplace(GuestRIP, RAData); + + { + std::lock_guard lk(AOTCacheLock); + AOTCache.insert({GuestRIP, {0, 0, 0, IRList, RAData}}); + } } if (DecrementRefCount) diff --git a/External/FEXCore/Source/Interface/Core/Frontend.cpp b/External/FEXCore/Source/Interface/Core/Frontend.cpp index e5b23f676..198b88547 100644 --- a/External/FEXCore/Source/Interface/Core/Frontend.cpp +++ b/External/FEXCore/Source/Interface/Core/Frontend.cpp @@ -992,6 +992,9 @@ bool Decoder::DecodeInstructionsAtEntry(uint8_t const* _InstStream, uint64_t PC) SymbolMinAddress = EntryPoint; } + DecodedMinAddress = EntryPoint; + DecodedMaxAddress = EntryPoint; + // Entry is a jump target BlocksToDecode.emplace(PC); @@ -1024,6 +1027,9 @@ bool Decoder::DecodeInstructionsAtEntry(uint8_t const* _InstStream, uint64_t PC) break; } + DecodedMinAddress = std::min(DecodedMinAddress, PCOffset); + DecodedMaxAddress = std::max(DecodedMaxAddress, PCOffset + DecodeInst->InstSize); + ++TotalInstructions; ++BlockNumberOfInstructions; ++DecodedSize; diff --git a/External/FEXCore/Source/Interface/Core/Frontend.h b/External/FEXCore/Source/Interface/Core/Frontend.h index 2a9ee1f3a..723177dc6 100644 --- a/External/FEXCore/Source/Interface/Core/Frontend.h +++ b/External/FEXCore/Source/Interface/Core/Frontend.h @@ -30,6 +30,9 @@ public: return &Blocks; } + uint64_t DecodedMinAddress {}; + uint64_t DecodedMaxAddress {~0ULL}; + private: FEXCore::Context::Context *CTX; diff --git a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterClass.h b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterClass.h index 34f0478ff..528b78b62 100644 --- a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterClass.h +++ b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterClass.h @@ -22,7 +22,7 @@ public: explicit InterpreterCore(FEXCore::Context::Context *ctx, FEXCore::Core::InternalThreadState *Thread, bool CompileThread); ~InterpreterCore() override; std::string GetName() override { return "Interpreter"; } - void *CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) override; + void *CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) override; void *MapRegion(void* HostPtr, uint64_t, uint64_t) override { return HostPtr; } diff --git a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterCore.cpp b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterCore.cpp index 84f6f66b8..18b6897e9 100644 --- a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterCore.cpp +++ b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterCore.cpp @@ -111,7 +111,7 @@ InterpreterCore::~InterpreterCore() { } -void *InterpreterCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView const *IR, [[maybe_unused]] FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) { +void *InterpreterCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView const *IR, [[maybe_unused]] FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) { return reinterpret_cast(InterpreterExecution); } diff --git a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.cpp b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.cpp index 01e358f20..4e5c00aa6 100644 --- a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.cpp +++ b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.cpp @@ -922,7 +922,7 @@ bool InterpreterOps::GetFallbackHandler(IR::IROp_Header *IROp, FallbackInfo *Inf return false; } -void InterpreterOps::InterpretIR(FEXCore::Core::InternalThreadState *Thread, FEXCore::IR::IRListView *CurrentIR, FEXCore::Core::DebugData *DebugData) { +void InterpreterOps::InterpretIR(FEXCore::Core::InternalThreadState *Thread, FEXCore::IR::IRListView *CurrentIR, FEXCore::Core::DebugData *DebugData) { volatile void* stack = alloca(0); // Debug data is only passed in debug builds diff --git a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.h b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.h index 89492aabf..a543ab719 100644 --- a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.h +++ b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.h @@ -3,8 +3,7 @@ namespace FEXCore::Core { } namespace FEXCore::IR { - template - class IRListView; + class IRListView; } namespace FEXCore::Core{ @@ -37,7 +36,7 @@ namespace FEXCore::CPU { class InterpreterOps { public: - static void InterpretIR(FEXCore::Core::InternalThreadState *Thread, FEXCore::IR::IRListView *CurrentIR, FEXCore::Core::DebugData *DebugData); + static void InterpretIR(FEXCore::Core::InternalThreadState *Thread, FEXCore::IR::IRListView *CurrentIR, FEXCore::Core::DebugData *DebugData); static bool GetFallbackHandler(IR::IROp_Header *IROp, FallbackInfo *Info); }; }; \ No newline at end of file diff --git a/External/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp b/External/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp index d9ea891c8..78a9ce4c0 100644 --- a/External/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp +++ b/External/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp @@ -1033,7 +1033,7 @@ bool JITCore::IsGPR(uint32_t Node) { return Class == IR::GPRClass || Class == IR::GPRFixedClass; } -void *JITCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView const *IR, [[maybe_unused]] FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) { +void *JITCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView const *IR, [[maybe_unused]] FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) { using namespace aarch64; JumpTargets.clear(); uint32_t SSACount = IR->GetSSACount(); diff --git a/External/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h b/External/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h index 4a105c438..3d263e33d 100644 --- a/External/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h +++ b/External/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h @@ -78,7 +78,7 @@ public: ~JITCore() override; std::string GetName() override { return "JIT"; } - void *CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) override; + void *CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) override; void *MapRegion(void* HostPtr, uint64_t, uint64_t) override { return HostPtr; } @@ -100,7 +100,7 @@ private: Label *PendingTargetLabel; FEXCore::Context::Context *CTX; FEXCore::Core::InternalThreadState *State; - FEXCore::IR::IRListView const *IR; + FEXCore::IR::IRListView const *IR; std::map JumpTargets; diff --git a/External/FEXCore/Source/Interface/Core/JIT/x86_64/JIT.cpp b/External/FEXCore/Source/Interface/Core/JIT/x86_64/JIT.cpp index 6befec7c2..f915bb0d4 100644 --- a/External/FEXCore/Source/Interface/Core/JIT/x86_64/JIT.cpp +++ b/External/FEXCore/Source/Interface/Core/JIT/x86_64/JIT.cpp @@ -838,7 +838,7 @@ std::tuple JITCore::GetCC(IR::Con return { &CodeGenerator::sete , &CodeGenerator::cmove , &CodeGenerator::je }; } -void *JITCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView const *IR, [[maybe_unused]] FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) { +void *JITCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView const *IR, [[maybe_unused]] FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) { JumpTargets.clear(); uint32_t SSACount = IR->GetSSACount(); diff --git a/External/FEXCore/Source/Interface/Core/JIT/x86_64/JITClass.h b/External/FEXCore/Source/Interface/Core/JIT/x86_64/JITClass.h index ca182e74b..d0d938830 100644 --- a/External/FEXCore/Source/Interface/Core/JIT/x86_64/JITClass.h +++ b/External/FEXCore/Source/Interface/Core/JIT/x86_64/JITClass.h @@ -59,7 +59,7 @@ public: explicit JITCore(FEXCore::Context::Context *ctx, FEXCore::Core::InternalThreadState *Thread, CodeBuffer Buffer, bool CompileThread); ~JITCore() override; std::string GetName() override { return "JIT"; } - void *CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) override; + void *CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) override; void *MapRegion(void* HostPtr, uint64_t, uint64_t) override { return HostPtr; } @@ -79,7 +79,7 @@ private: Label* PendingTargetLabel{}; FEXCore::Context::Context *CTX; FEXCore::Core::InternalThreadState *ThreadState; - FEXCore::IR::IRListView const *IR; + FEXCore::IR::IRListView const *IR; std::unordered_map JumpTargets; Xbyak::util::Cpu Features{}; diff --git a/External/FEXCore/Source/Interface/IR/IRDumper.cpp b/External/FEXCore/Source/Interface/IR/IRDumper.cpp index 7a8bef5aa..664779126 100644 --- a/External/FEXCore/Source/Interface/IR/IRDumper.cpp +++ b/External/FEXCore/Source/Interface/IR/IRDumper.cpp @@ -12,15 +12,15 @@ namespace FEXCore::IR { #include -static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView const* IR, uint64_t Arg) { +static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView const* IR, uint64_t Arg) { *out << "#0x" << std::hex << Arg; } -static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView const* IR, const char* Arg) { +static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView const* IR, const char* Arg) { *out << Arg; } -static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView const* IR, CondClassType Arg) { +static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView const* IR, CondClassType Arg) { std::array CondNames = { "EQ", "NEQ", @@ -49,7 +49,7 @@ static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView *out << CondNames[Arg]; } -static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView const* IR, MemOffsetType Arg) { +static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView const* IR, MemOffsetType Arg) { std::array Names = { "SXTX", "UXTW", @@ -59,7 +59,7 @@ static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView *out << Names[Arg]; } -static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView const* IR, RegisterClassType Arg) { +static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView const* IR, RegisterClassType Arg) { if (Arg == GPRClass.Val) *out << "GPR"; else if (Arg == GPRFixedClass.Val) @@ -74,7 +74,7 @@ static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView *out << "Unknown Registerclass " << Arg; } -static void PrintArg(std::stringstream *out, IRListView const* IR, OrderedNodeWrapper Arg, IR::RegisterAllocationData *RAData) { +static void PrintArg(std::stringstream *out, IRListView const* IR, OrderedNodeWrapper Arg, IR::RegisterAllocationData *RAData) { auto [CodeNode, IROp] = IR->at(Arg)(); if (Arg.ID() == 0) { @@ -123,7 +123,7 @@ static void PrintArg(std::stringstream *out, IRListView const* IR, Ordere } } -static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView const* IR, FEXCore::IR::FenceType Arg) { +static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView const* IR, FEXCore::IR::FenceType Arg) { if (Arg == IR::Fence_Load) { *out << "Loads"; } @@ -138,7 +138,7 @@ static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView } } -void Dump(std::stringstream *out, IRListView const* IR, IR::RegisterAllocationData *RAData) { +void Dump(std::stringstream *out, IRListView const* IR, IR::RegisterAllocationData *RAData) { auto HeaderOp = IR->GetHeader(); int8_t CurrentIndent = 0; diff --git a/External/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp b/External/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp index fcb6d3fd5..64302ef5f 100644 --- a/External/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp +++ b/External/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp @@ -258,6 +258,7 @@ namespace { Graph->AllocData.reset(); Graph->AllocData.reset((FEXCore::IR::RegisterAllocationData*)malloc(FEXCore::IR::RegisterAllocationData::Size(NodeCount))); memset(&Graph->AllocData->Map[0], INVALID_REGCLASS.Raw, NodeCount); + Graph->AllocData->MapCount = NodeCount; Graph->NodeCount = NodeCount; } @@ -302,7 +303,7 @@ namespace { } #endif - FEXCore::IR::RegisterClassType GetRegClassFromNode(FEXCore::IR::IRListView *IR, FEXCore::IR::IROp_Header *IROp) { + FEXCore::IR::RegisterClassType GetRegClassFromNode(FEXCore::IR::IRListView *IR, FEXCore::IR::IROp_Header *IROp) { using namespace FEXCore; FEXCore::IR::RegisterClassType Class = IR::GetRegClass(IROp->Op); @@ -356,7 +357,7 @@ namespace { }; // Walk the IR and set the node classes - void FindNodeClasses(RegisterGraph *Graph, FEXCore::IR::IRListView *IR) { + void FindNodeClasses(RegisterGraph *Graph, FEXCore::IR::IRListView *IR) { for (auto [CodeNode, IROp] : IR->GetAllCode()) { // If the destination hasn't yet been set then set it now if (IROp->HasDest) { @@ -410,14 +411,14 @@ namespace FEXCore::IR { std::unordered_map LocalBlockInterferences; BlockInterferences GlobalBlockInterferences; - void CalculateLiveRange(FEXCore::IR::IRListView *IR); - void OptimizeStaticRegisters(FEXCore::IR::IRListView *IR); - void CalculateBlockInterferences(FEXCore::IR::IRListView *IR); - void CalculateBlockNodeInterference(FEXCore::IR::IRListView *IR); - void CalculateNodeInterference(FEXCore::IR::IRListView *IR); + void CalculateLiveRange(FEXCore::IR::IRListView *IR); + void OptimizeStaticRegisters(FEXCore::IR::IRListView *IR); + void CalculateBlockInterferences(FEXCore::IR::IRListView *IR); + void CalculateBlockNodeInterference(FEXCore::IR::IRListView *IR); + void CalculateNodeInterference(FEXCore::IR::IRListView *IR); void AllocateVirtualRegisters(); - void CalculatePredecessors(FEXCore::IR::IRListView *IR); - void RecursiveLiveRangeExpansion(FEXCore::IR::IRListView *IR, uint32_t Node, uint32_t DefiningBlockID, LiveRange *LiveRange, const std::unordered_set &Predecessors, std::unordered_set &VisitedPredecessors); + void CalculatePredecessors(FEXCore::IR::IRListView *IR); + void RecursiveLiveRangeExpansion(FEXCore::IR::IRListView *IR, uint32_t Node, uint32_t DefiningBlockID, LiveRange *LiveRange, const std::unordered_set &Predecessors, std::unordered_set &VisitedPredecessors); FEXCore::IR::AllNodesIterator FindFirstUse(FEXCore::IR::IREmitter *IREmit, FEXCore::IR::OrderedNode* Node, FEXCore::IR::AllNodesIterator Begin, FEXCore::IR::AllNodesIterator End); FEXCore::IR::AllNodesIterator FindLastUseBefore(FEXCore::IR::IREmitter *IREmit, FEXCore::IR::OrderedNode* Node, FEXCore::IR::AllNodesIterator Begin, FEXCore::IR::AllNodesIterator End); @@ -468,7 +469,7 @@ namespace FEXCore::IR { return std::move(Graph->AllocData); } - void ConstrainedRAPass::RecursiveLiveRangeExpansion(FEXCore::IR::IRListView *IR, uint32_t Node, uint32_t DefiningBlockID, LiveRange *LiveRange, const std::unordered_set &Predecessors, std::unordered_set &VisitedPredecessors) { + void ConstrainedRAPass::RecursiveLiveRangeExpansion(FEXCore::IR::IRListView *IR, uint32_t Node, uint32_t DefiningBlockID, LiveRange *LiveRange, const std::unordered_set &Predecessors, std::unordered_set &VisitedPredecessors) { for (auto PredecessorId: Predecessors) { if (DefiningBlockID != PredecessorId && !VisitedPredecessors.contains(PredecessorId)) { // do the magic @@ -491,7 +492,7 @@ namespace FEXCore::IR { } } - void ConstrainedRAPass::CalculateLiveRange(FEXCore::IR::IRListView *IR) { + void ConstrainedRAPass::CalculateLiveRange(FEXCore::IR::IRListView *IR) { using namespace FEXCore; size_t Nodes = IR->GetSSACount(); LiveRanges.clear(); @@ -579,7 +580,7 @@ namespace FEXCore::IR { } } - void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView *IR) { + void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView *IR) { // Helpers @@ -790,7 +791,7 @@ namespace FEXCore::IR { } } - void ConstrainedRAPass::CalculateBlockInterferences(FEXCore::IR::IRListView *IR) { + void ConstrainedRAPass::CalculateBlockInterferences(FEXCore::IR::IRListView *IR) { using namespace FEXCore; for (auto [BlockNode, BlockHeader] : IR->GetBlocks()) { @@ -818,7 +819,7 @@ namespace FEXCore::IR { } } - void ConstrainedRAPass::CalculateBlockNodeInterference(FEXCore::IR::IRListView *IR) { + void ConstrainedRAPass::CalculateBlockNodeInterference(FEXCore::IR::IRListView *IR) { #if 0 auto AddInterference = [&](uint32_t Node1, uint32_t Node2) { RegisterNode *Node = &Graph->Nodes[Node1]; @@ -877,7 +878,7 @@ namespace FEXCore::IR { #endif } - void ConstrainedRAPass::CalculateNodeInterference(FEXCore::IR::IRListView *IR) { + void ConstrainedRAPass::CalculateNodeInterference(FEXCore::IR::IRListView *IR) { auto AddInterference = [this](uint32_t Node1, uint32_t Node2) { RegisterNode *Node = &Graph->Nodes[Node1]; Node->Interferences.Append(Node2); @@ -1477,7 +1478,7 @@ namespace FEXCore::IR { } - void ConstrainedRAPass::CalculatePredecessors(FEXCore::IR::IRListView *IR) { + void ConstrainedRAPass::CalculatePredecessors(FEXCore::IR::IRListView *IR) { Graph->BlockPredecessors.clear(); for (auto [BlockNode, BlockIROp] : IR->GetBlocks()) { diff --git a/External/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.h b/External/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.h index 86f21e4aa..5f431c6a4 100644 --- a/External/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.h +++ b/External/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.h @@ -4,7 +4,6 @@ #include namespace FEXCore::IR { -template class IRListView; class RegisterAllocationPass : public FEXCore::IR::Pass { diff --git a/External/FEXCore/docs/IR.md b/External/FEXCore/docs/IR.md index 16c2b1bae..1276857f7 100644 --- a/External/FEXCore/docs/IR.md +++ b/External/FEXCore/docs/IR.md @@ -79,10 +79,10 @@ This is an intrusive allocator that is used by the `OpDispatchBuilder` for stori ### OpDispatchBuilder OpDispatchBuilder provides two routines for handling the IR outside of the class -* `IRListView ViewIR();` +* `IRListView ViewIR();` * Returns a wrapper container class the allows you to view the IR. This doesn't take ownership of the IR data. * If the OpDispatcherBuilder changes its IR then changes are also visible to this class -* `IRListView *CreateIRCopy()` +* `IRListView *CreateIRCopy()` * As the name says, it creates a new copy of the IR that is in the OpDispatchBuilder * Copying the IR only copies the memory used and doesn't have any free space for optimizations after this copy operation * Useful for tiered recompilers, AOT, and offline analysis diff --git a/External/FEXCore/include/FEXCore/Core/CPUBackend.h b/External/FEXCore/include/FEXCore/Core/CPUBackend.h index 4b488008c..d247c7005 100644 --- a/External/FEXCore/include/FEXCore/Core/CPUBackend.h +++ b/External/FEXCore/include/FEXCore/Core/CPUBackend.h @@ -5,7 +5,6 @@ namespace FEXCore { namespace IR { - template class IRListView; class RegisterAllocationData; } @@ -45,7 +44,7 @@ class LLVMCore; * @return An executable function pointer that is theoretically compiled from this point. * Is actually a function pointer of type `void (FEXCore::Core::ThreadState *Thread) */ - virtual void *CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) = 0; + virtual void *CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) = 0; /** * @brief Function for mapping memory in to the CPUBackend's visible space. Allows setting up virtual mappings if required diff --git a/External/FEXCore/include/FEXCore/Core/Context.h b/External/FEXCore/include/FEXCore/Core/Context.h index eecdef535..7b9a10aea 100644 --- a/External/FEXCore/include/FEXCore/Core/Context.h +++ b/External/FEXCore/include/FEXCore/Core/Context.h @@ -6,6 +6,9 @@ #include #include +#include +#include + namespace FEXCore { class CodeLoader; } @@ -229,4 +232,7 @@ namespace FEXCore::Context { void SetSignalDelegator(FEXCore::Context::Context *CTX, FEXCore::SignalDelegator *SignalDelegation); void SetSyscallHandler(FEXCore::Context::Context *CTX, FEXCore::HLE::SyscallHandler *Handler); FEXCore::CPUID::FunctionResults RunCPUIDFunction(FEXCore::Context::Context *CTX, uint32_t Function, uint32_t Leaf); + + bool ReadAOT(FEXCore::Context::Context *CTX, std::istream& stream); + void WriteAOT(FEXCore::Context::Context *CTX, std::ostream& stream); } diff --git a/External/FEXCore/include/FEXCore/Debug/InternalThreadState.h b/External/FEXCore/include/FEXCore/Debug/InternalThreadState.h index 792a98f0e..9cf064b25 100644 --- a/External/FEXCore/include/FEXCore/Debug/InternalThreadState.h +++ b/External/FEXCore/include/FEXCore/Debug/InternalThreadState.h @@ -76,7 +76,7 @@ namespace FEXCore::Core { std::unique_ptr CPUBackend; std::unique_ptr LookupCache; - std::unordered_map>> IRLists; + std::unordered_map> IRLists; std::unordered_map> RALists; std::unordered_map> DebugData; diff --git a/External/FEXCore/include/FEXCore/IR/IR.h b/External/FEXCore/include/FEXCore/IR/IR.h index 2543b7be6..07d94f1b7 100644 --- a/External/FEXCore/include/FEXCore/IR/IR.h +++ b/External/FEXCore/include/FEXCore/IR/IR.h @@ -456,11 +456,10 @@ public: } }; -template class IRListView; class IREmitter; -void Dump(std::stringstream *out, IRListView const* IR, IR::RegisterAllocationData *RAData); +void Dump(std::stringstream *out, IRListView const* IR, IR::RegisterAllocationData *RAData); IREmitter* Parse(std::istream *in); template diff --git a/External/FEXCore/include/FEXCore/IR/IREmitter.h b/External/FEXCore/include/FEXCore/IR/IREmitter.h index 1512b3edf..1bd5c6672 100644 --- a/External/FEXCore/include/FEXCore/IR/IREmitter.h +++ b/External/FEXCore/include/FEXCore/IR/IREmitter.h @@ -21,8 +21,8 @@ friend class FEXCore::IR::PassManager; ResetWorkingList(); } - IRListView ViewIR() { return IRListView(&Data, &ListData); } - IRListView *CreateIRCopy() { return new IRListView(&Data, &ListData); } + IRListView ViewIR() { return IRListView(&Data, &ListData, false); } + IRListView *CreateIRCopy() { return new IRListView(&Data, &ListData, true); } void ResetWorkingList(); /** diff --git a/External/FEXCore/include/FEXCore/IR/IntrusiveIRList.h b/External/FEXCore/include/FEXCore/IR/IntrusiveIRList.h index d61151f34..510281d4a 100644 --- a/External/FEXCore/include/FEXCore/IR/IntrusiveIRList.h +++ b/External/FEXCore/include/FEXCore/IR/IntrusiveIRList.h @@ -8,6 +8,8 @@ #include #include #include +#include +#include namespace FEXCore::IR { /** @@ -62,17 +64,16 @@ class IntrusiveAllocator final { uintptr_t Data; }; -template class IRListView final { public: IRListView() = delete; - IRListView(IRListView &&) = delete; + IRListView(IRListView &&) = delete; - IRListView(IntrusiveAllocator *Data, IntrusiveAllocator *List) { + IRListView(IntrusiveAllocator *Data, IntrusiveAllocator *List, bool _IsCopy) : IsCopy(_IsCopy) { DataSize = Data->Size(); ListSize = List->Size(); - if (Copy) { + if (IsCopy) { IRData = malloc(DataSize + ListSize); ListData = reinterpret_cast(reinterpret_cast(IRData) + DataSize); memcpy(IRData, reinterpret_cast(Data->Begin()), DataSize); @@ -85,24 +86,46 @@ public: } } - IRListView(IRListView *Old) { + IRListView(IRListView *Old, bool _IsCopy) : IsCopy(_IsCopy) { DataSize = Old->DataSize; ListSize = Old->ListSize; + if (IsCopy) { + IRData = malloc(DataSize + ListSize); + ListData = reinterpret_cast(reinterpret_cast(IRData) + DataSize); + memcpy(IRData, Old->IRData, DataSize); + memcpy(ListData, Old->ListData, ListSize); + } else { + IRData = Old->IRData; + ListData = Old->ListData; + } + } + + IRListView(std::istream& stream) : IsCopy(true) { + stream.read((char*)&DataSize, sizeof(DataSize)); + stream.read((char*)&ListSize, sizeof(ListSize)); + IRData = malloc(DataSize + ListSize); ListData = reinterpret_cast(reinterpret_cast(IRData) + DataSize); - memcpy(IRData, Old->IRData, DataSize); - memcpy(ListData, Old->ListData, ListSize); + stream.read((char*)IRData, DataSize); + stream.read((char*)ListData, ListSize); } ~IRListView() { - if (Copy) { + if (IsCopy) { free (IRData); // ListData is just offset from IRData } } - IRListView *CreateCopy() { - return new IRListView(this); + void Serialize(std::ostream& stream) { + stream.write((char*)&DataSize, sizeof(DataSize)); + stream.write((char*)&ListSize, sizeof(ListSize)); + stream.write((char*)IRData, DataSize); + stream.write((char*)ListData, ListSize); + } + + IRListView *CreateCopy() { + return new IRListView(this, true); } uintptr_t const GetData() const { return reinterpret_cast(IRData); } @@ -259,6 +282,7 @@ private: void *ListData; size_t DataSize; size_t ListSize; + bool IsCopy; }; } diff --git a/External/FEXCore/include/FEXCore/IR/RegisterAllocationData.h b/External/FEXCore/include/FEXCore/IR/RegisterAllocationData.h index 112574813..6844c6977 100644 --- a/External/FEXCore/include/FEXCore/IR/RegisterAllocationData.h +++ b/External/FEXCore/include/FEXCore/IR/RegisterAllocationData.h @@ -37,6 +37,7 @@ struct RegisterAllocationDataDeleter { class RegisterAllocationData { public: uint32_t SpillSlotCount {}; + uint32_t MapCount {}; PhysicalRegister Map[0]; PhysicalRegister GetNodeRegister(uint32_t Node) const { diff --git a/Source/CommonCore/HostFactory.cpp b/Source/CommonCore/HostFactory.cpp index ec643d33c..63ca949db 100644 --- a/Source/CommonCore/HostFactory.cpp +++ b/Source/CommonCore/HostFactory.cpp @@ -30,7 +30,7 @@ namespace HostFactory { explicit HostCore(FEXCore::Context::Context* CTX, FEXCore::Core::ThreadState *Thread, bool Fallback); ~HostCore() override; std::string GetName() override { return "Host Core"; } - void* CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) override; + void* CompileCode(FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) override; void *MapRegion(void *HostPtr, uint64_t VirtualGuestPtr, uint64_t Size) override { return HostPtr; @@ -170,7 +170,7 @@ namespace HostFactory { ready(); } - void* HostCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) { + void* HostCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView const *IR, FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData) { return nullptr; } diff --git a/Source/Tests/ELFLoader.cpp b/Source/Tests/ELFLoader.cpp index be54f1f16..bd1dccd24 100644 --- a/Source/Tests/ELFLoader.cpp +++ b/Source/Tests/ELFLoader.cpp @@ -16,6 +16,8 @@ #include #include #include +#include +#include namespace { static bool SilentLog; @@ -287,8 +289,31 @@ int main(int argc, char **argv, char **const envp) { }); } + std::string base_filename = Program.substr(Program.find_last_of("/\\") + 1) + ".fex-emu.aot"; + + { + std::ifstream AOTRead(base_filename, std::ios::in | std::ios::binary); + + if (AOTRead) { + if (FEXCore::Context::ReadAOT(CTX, AOTRead)) { + LogMan::Msg::I("AOT Cache Loaded\n"); + } + } + } + FEXCore::Context::RunUntilExit(CTX); + { + std::ofstream AOTWrite(base_filename, std::ios::out | std::ios::binary ); + + if (AOTWrite) { + std::filesystem::resize_file(base_filename, 0); + AOTWrite.seekp(0); + FEXCore::Context::WriteAOT(CTX, AOTWrite); + LogMan::Msg::I("AOT Cache Stored\n"); + } + } + auto ProgramStatus = FEXCore::Context::GetProgramStatus(CTX); SyscallHandler.reset();