mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-01-24 23:46:53 +00:00
Merge pull request #2330 from Sonicadvance1/implement_flushes
OpDispatcher: Adds support for CLWB and CLFLUSHOPT
This commit is contained in:
commit
7be2e1ad34
@ -655,8 +655,8 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_07h(uint32_t Leaf) {
|
||||
(0 << 20) | // SMAP Supervisor mode access prevention and CLAC/STAC instructions
|
||||
(0 << 21) | // Reserved
|
||||
(0 << 22) | // Reserved
|
||||
(0 << 23) | // CLFLUSHOPT instruction
|
||||
(0 << 24) | // CLWB instruction
|
||||
(1 << 23) | // CLFLUSHOPT instruction
|
||||
(CTX->HostFeatures.SupportsCLWB << 24) | // CLWB instruction
|
||||
(0 << 25) | // Intel processor trace
|
||||
(0 << 26) | // Reserved
|
||||
(0 << 27) | // Reserved
|
||||
|
@ -79,6 +79,7 @@ HostFeatures::HostFeatures() {
|
||||
SupportsSHA = true;
|
||||
SupportsBMI1 = true;
|
||||
SupportsBMI2 = true;
|
||||
SupportsCLWB = true;
|
||||
|
||||
if (!SupportsAtomics) {
|
||||
WARN_ONCE_FMT("Host CPU doesn't support atomics. Expect bad performance");
|
||||
@ -128,6 +129,7 @@ HostFeatures::HostFeatures() {
|
||||
SupportsSHA = Features.has(Xbyak::util::Cpu::tSHA);
|
||||
SupportsBMI1 = Features.has(Xbyak::util::Cpu::tBMI1);
|
||||
SupportsBMI2 = Features.has(Xbyak::util::Cpu::tBMI2);
|
||||
SupportsBMI2 = Features.has(Xbyak::util::Cpu::tCLWB);
|
||||
SupportsPMULL_128Bit = Features.has(Xbyak::util::Cpu::tPCLMULQDQ);
|
||||
|
||||
// xbyak doesn't know how to check for CLZero
|
||||
|
@ -155,6 +155,7 @@ constexpr OpHandlerArray InterpreterOpHandlers = [] {
|
||||
REGISTER_OP(LOADMEMTSO, LoadMem);
|
||||
REGISTER_OP(STOREMEMTSO, StoreMem);
|
||||
REGISTER_OP(CACHELINECLEAR, CacheLineClear);
|
||||
REGISTER_OP(CACHELINECLEAN, CacheLineClean);
|
||||
REGISTER_OP(CACHELINEZERO, CacheLineZero);
|
||||
|
||||
// Misc ops
|
||||
|
@ -182,6 +182,7 @@ namespace FEXCore::CPU {
|
||||
DEF_OP(LoadMem);
|
||||
DEF_OP(StoreMem);
|
||||
DEF_OP(CacheLineClear);
|
||||
DEF_OP(CacheLineClean);
|
||||
DEF_OP(CacheLineZero);
|
||||
|
||||
///< Misc ops
|
||||
|
@ -23,6 +23,22 @@ static inline void CacheLineFlush(char *Addr) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void CacheLineClean(char *Addr) {
|
||||
#ifdef _M_X86_64
|
||||
__asm volatile (
|
||||
"clwb (%[Addr]);"
|
||||
:: [Addr] "r" (Addr)
|
||||
: "memory");
|
||||
#elif _M_ARM_64
|
||||
__asm volatile (
|
||||
"dc cvac, %[Addr]"
|
||||
:: [Addr] "r" (Addr)
|
||||
: "memory");
|
||||
#else
|
||||
LOGMAN_THROW_A_FMT("Unsupported architecture with cacheline clean");
|
||||
#endif
|
||||
}
|
||||
|
||||
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
|
||||
DEF_OP(LoadContext) {
|
||||
const auto Op = IROp->C<IR::IROp_LoadContext>();
|
||||
@ -281,6 +297,15 @@ DEF_OP(CacheLineClear) {
|
||||
CacheLineFlush(MemData);
|
||||
}
|
||||
|
||||
DEF_OP(CacheLineClean) {
|
||||
auto Op = IROp->C<IR::IROp_CacheLineClean>();
|
||||
|
||||
char *MemData = *GetSrc<char **>(Data->SSAData, Op->Addr);
|
||||
|
||||
// 64-byte cache line clear
|
||||
CacheLineClean(MemData);
|
||||
}
|
||||
|
||||
DEF_OP(CacheLineZero) {
|
||||
auto Op = IROp->C<IR::IROp_CacheLineZero>();
|
||||
|
||||
|
@ -888,6 +888,7 @@ void *Arm64JITCore::CompileCode(uint64_t Entry,
|
||||
}
|
||||
break;
|
||||
REGISTER_OP(CACHELINECLEAR, CacheLineClear);
|
||||
REGISTER_OP(CACHELINECLEAN, CacheLineClean);
|
||||
REGISTER_OP(CACHELINEZERO, CacheLineZero);
|
||||
|
||||
// Misc ops
|
||||
|
@ -356,6 +356,7 @@ private:
|
||||
DEF_OP(ParanoidLoadMemTSO);
|
||||
DEF_OP(ParanoidStoreMemTSO);
|
||||
DEF_OP(CacheLineClear);
|
||||
DEF_OP(CacheLineClean);
|
||||
DEF_OP(CacheLineZero);
|
||||
|
||||
///< Misc ops
|
||||
|
@ -1496,8 +1496,25 @@ DEF_OP(CacheLineClear) {
|
||||
dc(ARMEmitter::DataCacheOperation::CIVAC, TMP1);
|
||||
add(ARMEmitter::Size::i64Bit, TMP1, TMP1, CTX->HostFeatures.DCacheLineSize);
|
||||
}
|
||||
|
||||
if (Op->Serialize) {
|
||||
// If requested, serialized all of the data cache operations.
|
||||
dsb(FEXCore::ARMEmitter::BarrierScope::ISH);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(CacheLineClean) {
|
||||
auto Op = IROp->C<IR::IROp_CacheLineClean>();
|
||||
|
||||
auto MemReg = GetReg(Op->Addr.ID());
|
||||
|
||||
// Clean dcache only
|
||||
mov(TMP1, MemReg.X());
|
||||
for (size_t i = 0; i < std::max(1U, CTX->HostFeatures.DCacheLineSize / 64U); ++i) {
|
||||
dc(ARMEmitter::DataCacheOperation::CVAC, TMP1);
|
||||
add(ARMEmitter::Size::i64Bit, TMP1, TMP1, CTX->HostFeatures.DCacheLineSize);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(CacheLineZero) {
|
||||
auto Op = IROp->C<IR::IROp_CacheLineZero>();
|
||||
|
@ -348,6 +348,7 @@ private:
|
||||
DEF_OP(LoadMem);
|
||||
DEF_OP(StoreMem);
|
||||
DEF_OP(CacheLineClear);
|
||||
DEF_OP(CacheLineClean);
|
||||
DEF_OP(CacheLineZero);
|
||||
|
||||
///< Misc ops
|
||||
|
@ -771,8 +771,20 @@ DEF_OP(CacheLineClear) {
|
||||
|
||||
Xbyak::Reg MemReg = GetSrc<RA_64>(Op->Addr.ID());
|
||||
|
||||
if (Op->Serialize) {
|
||||
clflush(ptr [MemReg]);
|
||||
}
|
||||
else {
|
||||
clflushopt(ptr [MemReg]);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(CacheLineClean) {
|
||||
auto Op = IROp->C<IR::IROp_CacheLineClean>();
|
||||
|
||||
Xbyak::Reg MemReg = GetSrc<RA_64>(Op->Addr.ID());
|
||||
clwb(ptr [MemReg]);
|
||||
}
|
||||
|
||||
DEF_OP(CacheLineZero) {
|
||||
auto Op = IROp->C<IR::IROp_CacheLineZero>();
|
||||
@ -809,6 +821,7 @@ void X86JITCore::RegisterMemoryHandlers() {
|
||||
REGISTER_OP(LOADMEMTSO, LoadMem);
|
||||
REGISTER_OP(STOREMEMTSO, StoreMem);
|
||||
REGISTER_OP(CACHELINECLEAR, CacheLineClear);
|
||||
REGISTER_OP(CACHELINECLEAN, CacheLineClean);
|
||||
REGISTER_OP(CACHELINEZERO, CacheLineZero);
|
||||
#undef REGISTER_OP
|
||||
}
|
||||
|
@ -5618,6 +5618,29 @@ void OpDispatchBuilder::FenceOp(OpcodeArgs) {
|
||||
_Fence({FenceType});
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::CLWB(OpcodeArgs) {
|
||||
OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, -1, false);
|
||||
DestMem = AppendSegmentOffset(DestMem, Op->Flags);
|
||||
_CacheLineClean(DestMem);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::CLFLUSHOPT(OpcodeArgs) {
|
||||
OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, -1, false);
|
||||
DestMem = AppendSegmentOffset(DestMem, Op->Flags);
|
||||
_CacheLineClear(DestMem, false);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::MemFenceOrXSAVEOPT(OpcodeArgs) {
|
||||
if (Op->ModRM == 0xF0) {
|
||||
// 0xF0 is MFENCE
|
||||
_Fence(FEXCore::IR::Fence_LoadStore);
|
||||
}
|
||||
else {
|
||||
LogMan::Msg::EFmt("Application tried using XSAVEOPT");
|
||||
UnimplementedOp(Op);
|
||||
}
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::StoreFenceOrCLFlush(OpcodeArgs) {
|
||||
if (Op->ModRM == 0xF8) {
|
||||
// 0xF8 is SFENCE
|
||||
@ -5627,7 +5650,7 @@ void OpDispatchBuilder::StoreFenceOrCLFlush(OpcodeArgs) {
|
||||
// This is a CLFlush
|
||||
OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, -1, false);
|
||||
DestMem = AppendSegmentOffset(DestMem, Op->Flags);
|
||||
_CacheLineClear(DestMem);
|
||||
_CacheLineClear(DestMem, true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -6765,12 +6788,15 @@ constexpr uint16_t PF_F2 = 3;
|
||||
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 2), 1, &OpDispatchBuilder::LDMXCSR},
|
||||
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 3), 1, &OpDispatchBuilder::STMXCSR},
|
||||
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 5), 1, &OpDispatchBuilder::FenceOp<FEXCore::IR::Fence_Load.Val>}, //LFENCE
|
||||
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 6), 1, &OpDispatchBuilder::FenceOp<FEXCore::IR::Fence_LoadStore.Val>}, //MFENCE
|
||||
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 6), 1, &OpDispatchBuilder::MemFenceOrXSAVEOPT}, //MFENCE
|
||||
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 7), 1, &OpDispatchBuilder::StoreFenceOrCLFlush}, //SFENCE
|
||||
|
||||
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_F3, 5), 1, &OpDispatchBuilder::UnimplementedOp},
|
||||
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_F3, 6), 1, &OpDispatchBuilder::UnimplementedOp},
|
||||
|
||||
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_66, 6), 1, &OpDispatchBuilder::CLWB},
|
||||
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_66, 7), 1, &OpDispatchBuilder::CLFLUSHOPT},
|
||||
|
||||
// GROUP 16
|
||||
{OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_NONE, 0), 8, &OpDispatchBuilder::NOPOp},
|
||||
{OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_F3, 0), 8, &OpDispatchBuilder::NOPOp},
|
||||
|
@ -689,6 +689,9 @@ public:
|
||||
template<uint8_t FenceType>
|
||||
void FenceOp(OpcodeArgs);
|
||||
|
||||
void CLWB(OpcodeArgs);
|
||||
void CLFLUSHOPT(OpcodeArgs);
|
||||
void MemFenceOrXSAVEOPT(OpcodeArgs);
|
||||
void StoreFenceOrCLFlush(OpcodeArgs);
|
||||
void CLZeroOp(OpcodeArgs);
|
||||
void RDTSCPOp(OpcodeArgs);
|
||||
|
@ -338,7 +338,7 @@ void InitializeSecondaryGroupTables() {
|
||||
{OPD(TYPE_GROUP_15, PF_NONE, 3), 1, X86InstInfo{"STMXCSR", TYPE_INST, GenFlagsSameSize(SIZE_32BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}},
|
||||
{OPD(TYPE_GROUP_15, PF_NONE, 4), 1, X86InstInfo{"XSAVE", TYPE_PRIV, FLAGS_NONE, 0, nullptr}},
|
||||
{OPD(TYPE_GROUP_15, PF_NONE, 5), 1, X86InstInfo{"LFENCE/XRSTOR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}},
|
||||
{OPD(TYPE_GROUP_15, PF_NONE, 6), 1, X86InstInfo{"MFENCE/XSAVEOPT", TYPE_INST, FLAGS_MODRM, 0, nullptr}},
|
||||
{OPD(TYPE_GROUP_15, PF_NONE, 6), 1, X86InstInfo{"MFENCE/XSAVEOPT", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}},
|
||||
{OPD(TYPE_GROUP_15, PF_NONE, 7), 1, X86InstInfo{"SFENCE/CLFLUSH", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}},
|
||||
|
||||
{OPD(TYPE_GROUP_15, PF_F3, 0), 1, X86InstInfo{"RDFSBASE", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY, 0, nullptr}},
|
||||
@ -356,8 +356,8 @@ void InitializeSecondaryGroupTables() {
|
||||
{OPD(TYPE_GROUP_15, PF_66, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
|
||||
{OPD(TYPE_GROUP_15, PF_66, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
|
||||
{OPD(TYPE_GROUP_15, PF_66, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
|
||||
{OPD(TYPE_GROUP_15, PF_66, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
|
||||
{OPD(TYPE_GROUP_15, PF_66, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
|
||||
{OPD(TYPE_GROUP_15, PF_66, 6), 1, X86InstInfo{"CLWB", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}},
|
||||
{OPD(TYPE_GROUP_15, PF_66, 7), 1, X86InstInfo{"CLFLUSHOPT", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}},
|
||||
|
||||
{OPD(TYPE_GROUP_15, PF_F2, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
|
||||
{OPD(TYPE_GROUP_15, PF_F2, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
|
||||
|
12
External/FEXCore/Source/Interface/IR/IR.json
vendored
12
External/FEXCore/Source/Interface/IR/IR.json
vendored
@ -479,9 +479,17 @@
|
||||
]
|
||||
},
|
||||
|
||||
"CacheLineClear GPR:$Addr": {
|
||||
"CacheLineClear GPR:$Addr, i1:$Serialize": {
|
||||
"Desc": ["Does a 64 byte cacheline clear at the address specified",
|
||||
"Only clears the data cachelines. Doesn't do any zeroing"
|
||||
"Only clears the data cachelines. Doesn't do any zeroing",
|
||||
"Can skip serialization if requested."
|
||||
],
|
||||
"HasSideEffects": true
|
||||
},
|
||||
"CacheLineClean GPR:$Addr": {
|
||||
"Desc": ["Does a 64 byte cacheline cleanat the address specified",
|
||||
"Only cleans the data cachelines. Doesn't do any zeroing",
|
||||
"Skips the invalidation step of the CacheLineClear operation"
|
||||
],
|
||||
"HasSideEffects": true
|
||||
},
|
||||
|
@ -27,6 +27,7 @@ class HostFeatures final {
|
||||
bool SupportsSHA{};
|
||||
bool SupportsBMI1{};
|
||||
bool SupportsBMI2{};
|
||||
bool SupportsCLWB{};
|
||||
bool SupportsPMULL_128Bit{};
|
||||
|
||||
// Float exception behaviour
|
||||
|
2
External/xbyak
vendored
2
External/xbyak
vendored
@ -1 +1 @@
|
||||
Subproject commit ea21d6e295ede3586ea5c62030bc1c50e2cb7e31
|
||||
Subproject commit b0f0c7805ad16d9abbac0f8101cc226669983b57
|
@ -72,6 +72,7 @@ class HostFeatures(Flag) :
|
||||
FEATURE_CLZERO = (1 << 5)
|
||||
FEATURE_BMI1 = (1 << 6)
|
||||
FEATURE_BMI2 = (1 << 7)
|
||||
FEATURE_CLWB = (1 << 8)
|
||||
|
||||
RegStringLookup = {
|
||||
"NONE": Regs.REG_NONE,
|
||||
@ -143,6 +144,7 @@ HostFeaturesLookup = {
|
||||
"CLZERO" : HostFeatures.FEATURE_CLZERO,
|
||||
"BMI1" : HostFeatures.FEATURE_BMI1,
|
||||
"BMI2" : HostFeatures.FEATURE_BMI2,
|
||||
"CLWB" : HostFeatures.FEATURE_CLWB,
|
||||
}
|
||||
|
||||
def parse_hexstring(s):
|
||||
|
@ -385,6 +385,7 @@ namespace FEX::HarnessHelper {
|
||||
FEATURE_CLZERO = (1 << 5),
|
||||
FEATURE_BMI1 = (1 << 6),
|
||||
FEATURE_BMI2 = (1 << 7),
|
||||
FEATURE_CLWB = (1 << 8),
|
||||
};
|
||||
|
||||
bool Requires3DNow() const { return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_3DNOW; }
|
||||
@ -395,6 +396,7 @@ namespace FEX::HarnessHelper {
|
||||
bool RequiresCLZERO() const { return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_CLZERO; }
|
||||
bool RequiresBMI1() const { return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_BMI1; }
|
||||
bool RequiresBMI2() const { return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_BMI2; }
|
||||
bool RequiresCLWB() const { return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_CLWB; }
|
||||
|
||||
private:
|
||||
FEX_CONFIG_OPT(ConfigDumpGPRs, DUMPGPRS);
|
||||
@ -534,6 +536,7 @@ namespace FEX::HarnessHelper {
|
||||
bool RequiresCLZERO() const { return Config.RequiresCLZERO(); }
|
||||
bool RequiresBMI1() const { return Config.RequiresBMI1(); }
|
||||
bool RequiresBMI2() const { return Config.RequiresBMI2(); }
|
||||
bool RequiresCLWB() const { return Config.RequiresCLWB(); }
|
||||
|
||||
private:
|
||||
constexpr static uint64_t STACK_SIZE = FHU::FEX_PAGE_SIZE;
|
||||
|
@ -178,7 +178,8 @@ int main(int argc, char **argv, char **const envp) {
|
||||
(!HostFeatures.SupportsSHA && Loader.RequiresSHA()) ||
|
||||
(!HostFeatures.SupportsCLZERO && Loader.RequiresCLZERO()) ||
|
||||
(!HostFeatures.SupportsBMI1 && Loader.RequiresBMI1()) ||
|
||||
(!HostFeatures.SupportsBMI2 && Loader.RequiresBMI2());
|
||||
(!HostFeatures.SupportsBMI2 && Loader.RequiresBMI2()) ||
|
||||
(!HostFeatures.SupportsCLWB && Loader.RequiresCLWB());
|
||||
|
||||
if (TestUnsupported) {
|
||||
FEXCore::Context::DestroyContext(CTX);
|
||||
|
@ -87,7 +87,7 @@ public:
|
||||
Label Gate{};
|
||||
// Patch gate entry point
|
||||
// mov(dword[rip + Gate], edi)
|
||||
jmpf(ptr[rip + Gate]);
|
||||
jmp(qword [rip + Gate], LabelType::T_FAR);
|
||||
|
||||
L(Gate);
|
||||
dd(0x1'0000); // This is a 32-bit offset from the start of the gate. We start at 0x1'0000 + 0
|
||||
|
14
unittests/ASM/Secondary/CLFLUSHOPT.asm
Normal file
14
unittests/ASM/Secondary/CLFLUSHOPT.asm
Normal file
@ -0,0 +1,14 @@
|
||||
%ifdef CONFIG
|
||||
{
|
||||
"RegData": {
|
||||
"RAX": "1"
|
||||
}
|
||||
}
|
||||
%endif
|
||||
|
||||
mov rdx, 0xe0000000
|
||||
; Just ensures the code is executed.
|
||||
clflushopt [rdx]
|
||||
|
||||
mov rax, 1
|
||||
hlt
|
15
unittests/ASM/Secondary/CLWB.asm
Normal file
15
unittests/ASM/Secondary/CLWB.asm
Normal file
@ -0,0 +1,15 @@
|
||||
%ifdef CONFIG
|
||||
{
|
||||
"RegData": {
|
||||
"RAX": "1"
|
||||
},
|
||||
"HostFeatures": ["CLWB"]
|
||||
}
|
||||
%endif
|
||||
|
||||
mov rdx, 0xe0000000
|
||||
; Just ensures the code is executed.
|
||||
clwb [rdx]
|
||||
|
||||
mov rax, 1
|
||||
hlt
|
Loading…
x
Reference in New Issue
Block a user