JIT: switch DF representation

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
Alyssa Rosenzweig 2024-02-27 10:16:15 -04:00
parent e3ee65e491
commit c99cbe6d0a
10 changed files with 83 additions and 23 deletions

View File

@ -167,6 +167,7 @@ namespace FEXCore::Context {
case X86State::RFLAG_ZF_RAW_LOC:
case X86State::RFLAG_SF_RAW_LOC:
case X86State::RFLAG_OF_RAW_LOC:
case X86State::RFLAG_DF_RAW_LOC:
// Intentionally do nothing.
// These contain multiple bits which can corrupt other members when compacted.
break;
@ -215,6 +216,11 @@ namespace FEXCore::Context {
uint32_t AF = ((Frame->State.af_raw ^ PFByte) & (1 << 4)) ? 1 : 0;
EFLAGS |= AF << X86State::RFLAG_AF_RAW_LOC;
// DF is pretransformed, undo the transform from 1/-1 back to 0/1
uint8_t DFByte = Frame->State.flags[X86State::RFLAG_DF_RAW_LOC];
if (DFByte & 0x80)
EFLAGS |= 1 << X86State::RFLAG_DF_RAW_LOC;
return EFLAGS;
}
@ -238,6 +244,10 @@ namespace FEXCore::Context {
// PF is inverted in our internal representation.
Frame->State.pf_raw = (EFLAGS & (1U << i)) ? 0 : 1;
break;
case X86State::RFLAG_DF_RAW_LOC:
// DF is encoded as 1/-1
Frame->State.flags[i] = (EFLAGS & (1U << i)) ? 0xff : 1;
break;
default:
Frame->State.flags[i] = (EFLAGS & (1U << i)) ? 1 : 0;
break;

View File

@ -888,6 +888,14 @@ DEF_OP(StoreNZCV) {
msr(ARMEmitter::SystemRegister::NZCV, GetReg(Op->Value.ID()));
}
DEF_OP(LoadDF) {
auto Dst = GetReg(Node);
auto Flag = X86State::RFLAG_DF_RAW_LOC;
// DF needs sign extension to turn 0x1/0xFF into 1/-1
ldrsb(Dst.X(), STATE, offsetof(FEXCore::Core::CPUState, flags[Flag]));
}
DEF_OP(LoadFlag) {
auto Op = IROp->C<IR::IROp_LoadFlag>();
auto Dst = GetReg(Node);

View File

@ -1379,10 +1379,10 @@ void OpDispatchBuilder::FLAGControlOp(OpcodeArgs) {
SetRFLAG(_Constant(1), FEXCore::X86State::RFLAG_CF_RAW_LOC);
break;
case 0xFC: // CLD
SetRFLAG(_Constant(0), FEXCore::X86State::RFLAG_DF_LOC);
SetRFLAG(_Constant(0), FEXCore::X86State::RFLAG_DF_RAW_LOC);
break;
case 0xFD: // STD
SetRFLAG(_Constant(1), FEXCore::X86State::RFLAG_DF_LOC);
SetRFLAG(_Constant(1), FEXCore::X86State::RFLAG_DF_RAW_LOC);
break;
}
}

View File

@ -1401,6 +1401,11 @@ private:
if (ValueOffset || MustMask)
Value = _Bfe(OpSize::i32Bit, 1, ValueOffset, Value);
// For DF, we need to transform 0/1 into 1/-1
if (BitOffset == FEXCore::X86State::RFLAG_DF_RAW_LOC) {
Value = _SubShift(OpSize::i64Bit, _Constant(1), Value, ShiftType::LSL, 1);
}
_StoreFlag(Value, BitOffset);
}
}
@ -1453,6 +1458,9 @@ private:
return _LoadRegister(false, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
} else if (BitOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) {
return _LoadRegister(false, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
} else if (BitOffset == FEXCore::X86State::RFLAG_DF_RAW_LOC) {
// Recover the sign bit, it is the logical DF value
return _Lshr(OpSize::i64Bit, _LoadDF(), _Constant(63));
} else {
return _LoadFlag(BitOffset);
}
@ -1460,9 +1468,13 @@ private:
// Returns (DF ? -Size : Size)
OrderedNode *LoadDir(const unsigned Size) {
auto DF = GetRFLAG(FEXCore::X86State::RFLAG_DF_LOC);
auto SizeConst = _Constant(Size);
return _SubShift(IR::SizeToOpSize(CTX->GetGPRSize()), SizeConst, DF, ShiftType::LSL, FEXCore::ilog2(Size) + 1);
auto Dir = _LoadDF();
auto Shift = FEXCore::ilog2(Size);
if (Shift)
return _Lshl(IR::SizeToOpSize(CTX->GetGPRSize()), Dir, _Constant(Shift));
else
return Dir;
}
// Returns DF ? (X - Size) : (X + Size)

View File

@ -27,7 +27,7 @@ constexpr std::array<uint32_t, 17> FlagOffsets = {
FEXCore::X86State::RFLAG_SF_RAW_LOC,
FEXCore::X86State::RFLAG_TF_LOC,
FEXCore::X86State::RFLAG_IF_LOC,
FEXCore::X86State::RFLAG_DF_LOC,
FEXCore::X86State::RFLAG_DF_RAW_LOC,
FEXCore::X86State::RFLAG_OF_RAW_LOC,
FEXCore::X86State::RFLAG_IOPL_LOC,
FEXCore::X86State::RFLAG_NT_LOC,

View File

@ -456,6 +456,13 @@
"DestSize": "4"
},
"GPR = LoadDF": {
"Desc": ["Loads the decimal flag from the context object in -1/1",
"representation for easy consumption"
],
"DestSize": "8"
},
"GPR = LoadFlag u32:$Flag": {
"Desc": ["Loads an x86-64 flag from the context object",
"Specialized to allow flexible implementation of flag handling"

View File

@ -11,6 +11,7 @@ $end_info$
#include "Interface/IR/PassManager.h"
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/Utils/EnumOperators.h>
@ -483,6 +484,8 @@ private:
ContextMemberInfo *RecordAccess(ContextMemberInfo *Info, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size, LastAccessType AccessType, FEXCore::IR::OrderedNode *Node, FEXCore::IR::OrderedNode *StoreNode = nullptr);
ContextMemberInfo *RecordAccess(ContextInfo *ClassifiedInfo, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size, LastAccessType AccessType, FEXCore::IR::OrderedNode *Node, FEXCore::IR::OrderedNode *StoreNode = nullptr);
bool HandleLoadFlag(FEXCore::IR::IREmitter *IREmit, ContextInfo *LocalInfo, FEXCore::IR::OrderedNode *CodeNode, unsigned Flag);
// Classify context loads and stores.
bool ClassifyContextLoad(FEXCore::IR::IREmitter *IREmit, ContextInfo *LocalInfo, FEXCore::IR::RegisterClassType Class, uint32_t Offset, uint8_t Size, FEXCore::IR::OrderedNode *CodeNode, FEXCore::IR::NodeIterator BlockEnd);
bool ClassifyContextStore(FEXCore::IR::IREmitter *IREmit, ContextInfo *LocalInfo, FEXCore::IR::RegisterClassType Class, uint32_t Offset, uint8_t Size, FEXCore::IR::OrderedNode *CodeNode, FEXCore::IR::OrderedNode *ValueNode);
@ -551,6 +554,28 @@ bool RCLSE::ClassifyContextStore(FEXCore::IR::IREmitter *IREmit, ContextInfo *Lo
return false;
}
bool RCLSE::HandleLoadFlag(FEXCore::IR::IREmitter *IREmit, ContextInfo *LocalInfo, FEXCore::IR::OrderedNode *CodeNode, unsigned Flag) {
const auto FlagOffset = offsetof(FEXCore::Core::CPUState, flags[Flag]);
auto Info = FindMemberInfo(LocalInfo, FlagOffset, 1);
LastAccessType LastAccess = Info->Accessed;
auto LastValueNode = Info->ValueNode;
if (IsWriteAccess(LastAccess)) { // 1 byte so always a full write
// If the last store matches this load value then we can replace the loaded value with the previous valid one
IREmit->SetWriteCursor(CodeNode);
IREmit->ReplaceAllUsesWith(CodeNode, LastValueNode);
RecordAccess(Info, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::READ, LastValueNode);
return true;
}
else if (IsReadAccess(LastAccess)) {
IREmit->ReplaceAllUsesWith(CodeNode, LastValueNode);
RecordAccess(Info, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::READ, LastValueNode);
return true;
}
return false;
}
/**
* @brief This pass removes redundant pairs of storecontext and loadcontext ops
*
@ -660,23 +685,11 @@ bool RCLSE::RedundantStoreLoadElimination(FEXCore::IR::IREmitter *IREmit) {
}
else if (IROp->Op == OP_LOADFLAG) {
const auto Op = IROp->CW<IR::IROp_LoadFlag>();
const auto FlagOffset = offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag;
auto Info = FindMemberInfo(&LocalInfo, FlagOffset, 1);
LastAccessType LastAccess = Info->Accessed;
OrderedNode *LastValueNode = Info->ValueNode;
if (IsWriteAccess(LastAccess)) { // 1 byte so always a full write
// If the last store matches this load value then we can replace the loaded value with the previous valid one
IREmit->SetWriteCursor(CodeNode);
IREmit->ReplaceAllUsesWith(CodeNode, LastValueNode);
RecordAccess(Info, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::READ, LastValueNode);
Changed = true;
}
else if (IsReadAccess(LastAccess)) {
IREmit->ReplaceAllUsesWith(CodeNode, LastValueNode);
RecordAccess(Info, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::READ, LastValueNode);
Changed = true;
}
Changed |= HandleLoadFlag(IREmit, &LocalInfo, CodeNode, Op->Flag);
}
else if (IROp->Op == OP_LOADDF) {
Changed |= HandleLoadFlag(IREmit, &LocalInfo, CodeNode, X86State::RFLAG_DF_RAW_LOC);
}
else if (IROp->Op == OP_SYSCALL ||
IROp->Op == OP_INLINESYSCALL) {

View File

@ -10,6 +10,7 @@ $end_info$
#include "Interface/IR/PassManager.h"
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/Utils/LogManager.h>
@ -211,6 +212,10 @@ bool DeadStoreElimination::Run(IREmitter *IREmit) {
auto& BlockInfo = InfoMap[BlockNode];
BlockInfo.flag.reads |= 1UL << Op->Flag;
} else if (IROp->Op == OP_LOADDF) {
auto& BlockInfo = InfoMap[BlockNode];
BlockInfo.flag.reads |= 1UL << X86State::RFLAG_DF_RAW_LOC;
} else if (IROp->Op == OP_STOREREGISTER) {
auto Op = IROp->C<IR::IROp_StoreRegister>();

View File

@ -150,6 +150,11 @@ namespace FEXCore::Core {
flags[X86State::RFLAG_RESERVED_LOC] = 1; ///< Reserved - Always 1.
flags[X86State::RFLAG_IF_LOC] = 1; ///< Interrupt flag - Always 1.
// DF needs to be initialized to 0 to comply with the Linux ABI. However,
// we encode DF as 1/-1 within the JIT, so we have to write 0x1 here to
// zero DF.
flags[X86State::RFLAG_DF_RAW_LOC] = 0x1;
}
};
static_assert(std::is_trivially_copyable_v<CPUState>, "Needs to be trivial");

View File

@ -64,7 +64,7 @@ enum X86RegLocation : uint32_t {
RFLAG_SF_RAW_LOC = 7, // Not used directly, needs to be reconstructed using `ReconstructCompactedEFLAGS`
RFLAG_TF_LOC = 8,
RFLAG_IF_LOC = 9,
RFLAG_DF_LOC = 10,
RFLAG_DF_RAW_LOC = 10, // Contains multiple bits, needs to be reconstructed using `ReconstructCompactedEFLAGS`
RFLAG_OF_RAW_LOC = 11, // Not used directly, needs to be reconstructed using `ReconstructCompactedEFLAGS`
RFLAG_IOPL_LOC = 12,
RFLAG_NT_LOC = 14,