From b3055523b4d4f26296922141e5e0a123ac7e3996 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 8 Nov 2023 08:51:17 -0400 Subject: [PATCH] IR: Switch to dedicated NZCV load/store Semantics differ markedly from the non-NZCV flags, splitting this out makes it a lot easier to do things correctly imho. Gets the dest/src size correct (important for spilling), as well as makes our existing opt passes skip this which is needed for correctness at the moment anyway. Signed-off-by: Alyssa Rosenzweig --- .../Interface/Core/JIT/Arm64/MemoryOps.cpp | 22 ++++++++++++------- .../Source/Interface/Core/OpcodeDispatcher.h | 2 +- .../Interface/Core/OpcodeDispatcher/Flags.cpp | 4 ++-- FEXCore/Source/Interface/IR/IR.json | 11 ++++++++++ 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp index edca63eb6..eebc2cedc 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp @@ -1031,23 +1031,29 @@ DEF_OP(FillRegister) { } } +DEF_OP(LoadNZCV) { + auto Dst = GetReg(Node); + + ldr(Dst.W(), STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + 24); +} + +DEF_OP(StoreNZCV) { + auto Op = IROp->C(); + + str(GetReg(Op->Value.ID()).W(), STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + 24); +} + DEF_OP(LoadFlag) { auto Op = IROp->C(); auto Dst = GetReg(Node); - if (Op->Flag == 24 /* NZCV */) - ldr(Dst.W(), STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag); - else - ldrb(Dst, STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag); + ldrb(Dst, STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag); } DEF_OP(StoreFlag) { auto Op = IROp->C(); - if (Op->Flag == 24 /* NZCV */) - str(GetReg(Op->Value.ID()).W(), STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag); - else - strb(GetReg(Op->Value.ID()), STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag); + strb(GetReg(Op->Value.ID()), STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag); } FEXCore::ARMEmitter::ExtendedMemOperand Arm64JITCore::GenerateMemOperand(uint8_t AccessSize, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index c83e81af1..506b233fc 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1243,7 +1243,7 @@ private: OrderedNode *GetNZCV() { if (!CachedNZCV) { - CachedNZCV = _LoadFlag(FEXCore::X86State::RFLAG_NZCV_LOC); + CachedNZCV = _LoadNZCV(); // We don't know what's set PossiblySetNZCVBits = ~0; diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp index 8f490fd51..850636a7b 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp @@ -313,7 +313,7 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) { if (CurrentDeferredFlags.Type == FlagsGenerationType::TYPE_NONE) { // Nothing to do if (NZCVDirty && CachedNZCV) - _StoreFlag(CachedNZCV, FEXCore::X86State::RFLAG_NZCV_LOC); + _StoreNZCV(CachedNZCV); CachedNZCV = nullptr; NZCVDirty = false; @@ -501,7 +501,7 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) { CurrentDeferredFlags.Type = FlagsGenerationType::TYPE_NONE; if (NZCVDirty && CachedNZCV) - _StoreFlag(CachedNZCV, FEXCore::X86State::RFLAG_NZCV_LOC); + _StoreNZCV(CachedNZCV); CachedNZCV = nullptr; NZCVDirty = false; diff --git a/FEXCore/Source/Interface/IR/IR.json b/FEXCore/Source/Interface/IR/IR.json index 064e02a64..d9cb18c5c 100644 --- a/FEXCore/Source/Interface/IR/IR.json +++ b/FEXCore/Source/Interface/IR/IR.json @@ -447,6 +447,17 @@ ] }, + "GPR = LoadNZCV": { + "Desc": ["Loads value of NZCV register"], + "DestSize": "4" + }, + + "StoreNZCV GPR:$Value": { + "HasSideEffects": true, + "Desc": ["Stores value to NZCV register"], + "DestSize": "4" + }, + "GPR = LoadFlag u32:$Flag": { "Desc": ["Loads an x86-64 flag from the context object", "Specialized to allow flexible implementation of flag handling"