mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-02-12 18:39:18 +00:00
JIT: switch DF representation
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
parent
e3ee65e491
commit
c99cbe6d0a
@ -167,6 +167,7 @@ namespace FEXCore::Context {
|
||||
case X86State::RFLAG_ZF_RAW_LOC:
|
||||
case X86State::RFLAG_SF_RAW_LOC:
|
||||
case X86State::RFLAG_OF_RAW_LOC:
|
||||
case X86State::RFLAG_DF_RAW_LOC:
|
||||
// Intentionally do nothing.
|
||||
// These contain multiple bits which can corrupt other members when compacted.
|
||||
break;
|
||||
@ -215,6 +216,11 @@ namespace FEXCore::Context {
|
||||
uint32_t AF = ((Frame->State.af_raw ^ PFByte) & (1 << 4)) ? 1 : 0;
|
||||
EFLAGS |= AF << X86State::RFLAG_AF_RAW_LOC;
|
||||
|
||||
// DF is pretransformed, undo the transform from 1/-1 back to 0/1
|
||||
uint8_t DFByte = Frame->State.flags[X86State::RFLAG_DF_RAW_LOC];
|
||||
if (DFByte & 0x80)
|
||||
EFLAGS |= 1 << X86State::RFLAG_DF_RAW_LOC;
|
||||
|
||||
return EFLAGS;
|
||||
}
|
||||
|
||||
@ -238,6 +244,10 @@ namespace FEXCore::Context {
|
||||
// PF is inverted in our internal representation.
|
||||
Frame->State.pf_raw = (EFLAGS & (1U << i)) ? 0 : 1;
|
||||
break;
|
||||
case X86State::RFLAG_DF_RAW_LOC:
|
||||
// DF is encoded as 1/-1
|
||||
Frame->State.flags[i] = (EFLAGS & (1U << i)) ? 0xff : 1;
|
||||
break;
|
||||
default:
|
||||
Frame->State.flags[i] = (EFLAGS & (1U << i)) ? 1 : 0;
|
||||
break;
|
||||
|
@ -888,6 +888,14 @@ DEF_OP(StoreNZCV) {
|
||||
msr(ARMEmitter::SystemRegister::NZCV, GetReg(Op->Value.ID()));
|
||||
}
|
||||
|
||||
DEF_OP(LoadDF) {
|
||||
auto Dst = GetReg(Node);
|
||||
auto Flag = X86State::RFLAG_DF_RAW_LOC;
|
||||
|
||||
// DF needs sign extension to turn 0x1/0xFF into 1/-1
|
||||
ldrsb(Dst.X(), STATE, offsetof(FEXCore::Core::CPUState, flags[Flag]));
|
||||
}
|
||||
|
||||
DEF_OP(LoadFlag) {
|
||||
auto Op = IROp->C<IR::IROp_LoadFlag>();
|
||||
auto Dst = GetReg(Node);
|
||||
|
@ -1379,10 +1379,10 @@ void OpDispatchBuilder::FLAGControlOp(OpcodeArgs) {
|
||||
SetRFLAG(_Constant(1), FEXCore::X86State::RFLAG_CF_RAW_LOC);
|
||||
break;
|
||||
case 0xFC: // CLD
|
||||
SetRFLAG(_Constant(0), FEXCore::X86State::RFLAG_DF_LOC);
|
||||
SetRFLAG(_Constant(0), FEXCore::X86State::RFLAG_DF_RAW_LOC);
|
||||
break;
|
||||
case 0xFD: // STD
|
||||
SetRFLAG(_Constant(1), FEXCore::X86State::RFLAG_DF_LOC);
|
||||
SetRFLAG(_Constant(1), FEXCore::X86State::RFLAG_DF_RAW_LOC);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1401,6 +1401,11 @@ private:
|
||||
if (ValueOffset || MustMask)
|
||||
Value = _Bfe(OpSize::i32Bit, 1, ValueOffset, Value);
|
||||
|
||||
// For DF, we need to transform 0/1 into 1/-1
|
||||
if (BitOffset == FEXCore::X86State::RFLAG_DF_RAW_LOC) {
|
||||
Value = _SubShift(OpSize::i64Bit, _Constant(1), Value, ShiftType::LSL, 1);
|
||||
}
|
||||
|
||||
_StoreFlag(Value, BitOffset);
|
||||
}
|
||||
}
|
||||
@ -1453,6 +1458,9 @@ private:
|
||||
return _LoadRegister(false, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
|
||||
} else if (BitOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) {
|
||||
return _LoadRegister(false, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
|
||||
} else if (BitOffset == FEXCore::X86State::RFLAG_DF_RAW_LOC) {
|
||||
// Recover the sign bit, it is the logical DF value
|
||||
return _Lshr(OpSize::i64Bit, _LoadDF(), _Constant(63));
|
||||
} else {
|
||||
return _LoadFlag(BitOffset);
|
||||
}
|
||||
@ -1460,9 +1468,13 @@ private:
|
||||
|
||||
// Returns (DF ? -Size : Size)
|
||||
OrderedNode *LoadDir(const unsigned Size) {
|
||||
auto DF = GetRFLAG(FEXCore::X86State::RFLAG_DF_LOC);
|
||||
auto SizeConst = _Constant(Size);
|
||||
return _SubShift(IR::SizeToOpSize(CTX->GetGPRSize()), SizeConst, DF, ShiftType::LSL, FEXCore::ilog2(Size) + 1);
|
||||
auto Dir = _LoadDF();
|
||||
auto Shift = FEXCore::ilog2(Size);
|
||||
|
||||
if (Shift)
|
||||
return _Lshl(IR::SizeToOpSize(CTX->GetGPRSize()), Dir, _Constant(Shift));
|
||||
else
|
||||
return Dir;
|
||||
}
|
||||
|
||||
// Returns DF ? (X - Size) : (X + Size)
|
||||
|
@ -27,7 +27,7 @@ constexpr std::array<uint32_t, 17> FlagOffsets = {
|
||||
FEXCore::X86State::RFLAG_SF_RAW_LOC,
|
||||
FEXCore::X86State::RFLAG_TF_LOC,
|
||||
FEXCore::X86State::RFLAG_IF_LOC,
|
||||
FEXCore::X86State::RFLAG_DF_LOC,
|
||||
FEXCore::X86State::RFLAG_DF_RAW_LOC,
|
||||
FEXCore::X86State::RFLAG_OF_RAW_LOC,
|
||||
FEXCore::X86State::RFLAG_IOPL_LOC,
|
||||
FEXCore::X86State::RFLAG_NT_LOC,
|
||||
|
@ -456,6 +456,13 @@
|
||||
"DestSize": "4"
|
||||
},
|
||||
|
||||
"GPR = LoadDF": {
|
||||
"Desc": ["Loads the decimal flag from the context object in -1/1",
|
||||
"representation for easy consumption"
|
||||
],
|
||||
"DestSize": "8"
|
||||
},
|
||||
|
||||
"GPR = LoadFlag u32:$Flag": {
|
||||
"Desc": ["Loads an x86-64 flag from the context object",
|
||||
"Specialized to allow flexible implementation of flag handling"
|
||||
|
@ -11,6 +11,7 @@ $end_info$
|
||||
#include "Interface/IR/PassManager.h"
|
||||
|
||||
#include <FEXCore/Core/CoreState.h>
|
||||
#include <FEXCore/Core/X86Enums.h>
|
||||
#include <FEXCore/IR/IR.h>
|
||||
#include <FEXCore/IR/IntrusiveIRList.h>
|
||||
#include <FEXCore/Utils/EnumOperators.h>
|
||||
@ -483,6 +484,8 @@ private:
|
||||
ContextMemberInfo *RecordAccess(ContextMemberInfo *Info, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size, LastAccessType AccessType, FEXCore::IR::OrderedNode *Node, FEXCore::IR::OrderedNode *StoreNode = nullptr);
|
||||
ContextMemberInfo *RecordAccess(ContextInfo *ClassifiedInfo, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size, LastAccessType AccessType, FEXCore::IR::OrderedNode *Node, FEXCore::IR::OrderedNode *StoreNode = nullptr);
|
||||
|
||||
bool HandleLoadFlag(FEXCore::IR::IREmitter *IREmit, ContextInfo *LocalInfo, FEXCore::IR::OrderedNode *CodeNode, unsigned Flag);
|
||||
|
||||
// Classify context loads and stores.
|
||||
bool ClassifyContextLoad(FEXCore::IR::IREmitter *IREmit, ContextInfo *LocalInfo, FEXCore::IR::RegisterClassType Class, uint32_t Offset, uint8_t Size, FEXCore::IR::OrderedNode *CodeNode, FEXCore::IR::NodeIterator BlockEnd);
|
||||
bool ClassifyContextStore(FEXCore::IR::IREmitter *IREmit, ContextInfo *LocalInfo, FEXCore::IR::RegisterClassType Class, uint32_t Offset, uint8_t Size, FEXCore::IR::OrderedNode *CodeNode, FEXCore::IR::OrderedNode *ValueNode);
|
||||
@ -551,6 +554,28 @@ bool RCLSE::ClassifyContextStore(FEXCore::IR::IREmitter *IREmit, ContextInfo *Lo
|
||||
return false;
|
||||
}
|
||||
|
||||
bool RCLSE::HandleLoadFlag(FEXCore::IR::IREmitter *IREmit, ContextInfo *LocalInfo, FEXCore::IR::OrderedNode *CodeNode, unsigned Flag) {
|
||||
const auto FlagOffset = offsetof(FEXCore::Core::CPUState, flags[Flag]);
|
||||
auto Info = FindMemberInfo(LocalInfo, FlagOffset, 1);
|
||||
LastAccessType LastAccess = Info->Accessed;
|
||||
auto LastValueNode = Info->ValueNode;
|
||||
|
||||
if (IsWriteAccess(LastAccess)) { // 1 byte so always a full write
|
||||
// If the last store matches this load value then we can replace the loaded value with the previous valid one
|
||||
IREmit->SetWriteCursor(CodeNode);
|
||||
IREmit->ReplaceAllUsesWith(CodeNode, LastValueNode);
|
||||
RecordAccess(Info, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::READ, LastValueNode);
|
||||
return true;
|
||||
}
|
||||
else if (IsReadAccess(LastAccess)) {
|
||||
IREmit->ReplaceAllUsesWith(CodeNode, LastValueNode);
|
||||
RecordAccess(Info, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::READ, LastValueNode);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This pass removes redundant pairs of storecontext and loadcontext ops
|
||||
*
|
||||
@ -660,23 +685,11 @@ bool RCLSE::RedundantStoreLoadElimination(FEXCore::IR::IREmitter *IREmit) {
|
||||
}
|
||||
else if (IROp->Op == OP_LOADFLAG) {
|
||||
const auto Op = IROp->CW<IR::IROp_LoadFlag>();
|
||||
const auto FlagOffset = offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag;
|
||||
auto Info = FindMemberInfo(&LocalInfo, FlagOffset, 1);
|
||||
LastAccessType LastAccess = Info->Accessed;
|
||||
OrderedNode *LastValueNode = Info->ValueNode;
|
||||
|
||||
if (IsWriteAccess(LastAccess)) { // 1 byte so always a full write
|
||||
// If the last store matches this load value then we can replace the loaded value with the previous valid one
|
||||
IREmit->SetWriteCursor(CodeNode);
|
||||
IREmit->ReplaceAllUsesWith(CodeNode, LastValueNode);
|
||||
RecordAccess(Info, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::READ, LastValueNode);
|
||||
Changed = true;
|
||||
}
|
||||
else if (IsReadAccess(LastAccess)) {
|
||||
IREmit->ReplaceAllUsesWith(CodeNode, LastValueNode);
|
||||
RecordAccess(Info, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::READ, LastValueNode);
|
||||
Changed = true;
|
||||
}
|
||||
Changed |= HandleLoadFlag(IREmit, &LocalInfo, CodeNode, Op->Flag);
|
||||
}
|
||||
else if (IROp->Op == OP_LOADDF) {
|
||||
Changed |= HandleLoadFlag(IREmit, &LocalInfo, CodeNode, X86State::RFLAG_DF_RAW_LOC);
|
||||
}
|
||||
else if (IROp->Op == OP_SYSCALL ||
|
||||
IROp->Op == OP_INLINESYSCALL) {
|
||||
|
@ -10,6 +10,7 @@ $end_info$
|
||||
#include "Interface/IR/PassManager.h"
|
||||
|
||||
#include <FEXCore/Core/CoreState.h>
|
||||
#include <FEXCore/Core/X86Enums.h>
|
||||
#include <FEXCore/IR/IR.h>
|
||||
#include <FEXCore/IR/IntrusiveIRList.h>
|
||||
#include <FEXCore/Utils/LogManager.h>
|
||||
@ -211,6 +212,10 @@ bool DeadStoreElimination::Run(IREmitter *IREmit) {
|
||||
auto& BlockInfo = InfoMap[BlockNode];
|
||||
|
||||
BlockInfo.flag.reads |= 1UL << Op->Flag;
|
||||
} else if (IROp->Op == OP_LOADDF) {
|
||||
auto& BlockInfo = InfoMap[BlockNode];
|
||||
|
||||
BlockInfo.flag.reads |= 1UL << X86State::RFLAG_DF_RAW_LOC;
|
||||
} else if (IROp->Op == OP_STOREREGISTER) {
|
||||
auto Op = IROp->C<IR::IROp_StoreRegister>();
|
||||
|
||||
|
@ -150,6 +150,11 @@ namespace FEXCore::Core {
|
||||
|
||||
flags[X86State::RFLAG_RESERVED_LOC] = 1; ///< Reserved - Always 1.
|
||||
flags[X86State::RFLAG_IF_LOC] = 1; ///< Interrupt flag - Always 1.
|
||||
|
||||
// DF needs to be initialized to 0 to comply with the Linux ABI. However,
|
||||
// we encode DF as 1/-1 within the JIT, so we have to write 0x1 here to
|
||||
// zero DF.
|
||||
flags[X86State::RFLAG_DF_RAW_LOC] = 0x1;
|
||||
}
|
||||
};
|
||||
static_assert(std::is_trivially_copyable_v<CPUState>, "Needs to be trivial");
|
||||
|
@ -64,7 +64,7 @@ enum X86RegLocation : uint32_t {
|
||||
RFLAG_SF_RAW_LOC = 7, // Not used directly, needs to be reconstructed using `ReconstructCompactedEFLAGS`
|
||||
RFLAG_TF_LOC = 8,
|
||||
RFLAG_IF_LOC = 9,
|
||||
RFLAG_DF_LOC = 10,
|
||||
RFLAG_DF_RAW_LOC = 10, // Contains multiple bits, needs to be reconstructed using `ReconstructCompactedEFLAGS`
|
||||
RFLAG_OF_RAW_LOC = 11, // Not used directly, needs to be reconstructed using `ReconstructCompactedEFLAGS`
|
||||
RFLAG_IOPL_LOC = 12,
|
||||
RFLAG_NT_LOC = 14,
|
||||
|
Loading…
x
Reference in New Issue
Block a user