Merge pull request #2330 from Sonicadvance1/implement_flushes

OpDispatcher: Adds support for CLWB and CLFLUSHOPT
This commit is contained in:
Mai 2023-01-31 04:01:26 +00:00 committed by GitHub
commit 7be2e1ad34
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 149 additions and 14 deletions

View File

@ -655,8 +655,8 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_07h(uint32_t Leaf) {
(0 << 20) | // SMAP Supervisor mode access prevention and CLAC/STAC instructions
(0 << 21) | // Reserved
(0 << 22) | // Reserved
(0 << 23) | // CLFLUSHOPT instruction
(0 << 24) | // CLWB instruction
(1 << 23) | // CLFLUSHOPT instruction
(CTX->HostFeatures.SupportsCLWB << 24) | // CLWB instruction
(0 << 25) | // Intel processor trace
(0 << 26) | // Reserved
(0 << 27) | // Reserved

View File

@ -79,6 +79,7 @@ HostFeatures::HostFeatures() {
SupportsSHA = true;
SupportsBMI1 = true;
SupportsBMI2 = true;
SupportsCLWB = true;
if (!SupportsAtomics) {
WARN_ONCE_FMT("Host CPU doesn't support atomics. Expect bad performance");
@ -128,6 +129,7 @@ HostFeatures::HostFeatures() {
SupportsSHA = Features.has(Xbyak::util::Cpu::tSHA);
SupportsBMI1 = Features.has(Xbyak::util::Cpu::tBMI1);
SupportsBMI2 = Features.has(Xbyak::util::Cpu::tBMI2);
SupportsBMI2 = Features.has(Xbyak::util::Cpu::tCLWB);
SupportsPMULL_128Bit = Features.has(Xbyak::util::Cpu::tPCLMULQDQ);
// xbyak doesn't know how to check for CLZero

View File

@ -155,6 +155,7 @@ constexpr OpHandlerArray InterpreterOpHandlers = [] {
REGISTER_OP(LOADMEMTSO, LoadMem);
REGISTER_OP(STOREMEMTSO, StoreMem);
REGISTER_OP(CACHELINECLEAR, CacheLineClear);
REGISTER_OP(CACHELINECLEAN, CacheLineClean);
REGISTER_OP(CACHELINEZERO, CacheLineZero);
// Misc ops

View File

@ -182,6 +182,7 @@ namespace FEXCore::CPU {
DEF_OP(LoadMem);
DEF_OP(StoreMem);
DEF_OP(CacheLineClear);
DEF_OP(CacheLineClean);
DEF_OP(CacheLineZero);
///< Misc ops

View File

@ -23,6 +23,22 @@ static inline void CacheLineFlush(char *Addr) {
#endif
}
static inline void CacheLineClean(char *Addr) {
#ifdef _M_X86_64
__asm volatile (
"clwb (%[Addr]);"
:: [Addr] "r" (Addr)
: "memory");
#elif _M_ARM_64
__asm volatile (
"dc cvac, %[Addr]"
:: [Addr] "r" (Addr)
: "memory");
#else
LOGMAN_THROW_A_FMT("Unsupported architecture with cacheline clean");
#endif
}
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
DEF_OP(LoadContext) {
const auto Op = IROp->C<IR::IROp_LoadContext>();
@ -281,6 +297,15 @@ DEF_OP(CacheLineClear) {
CacheLineFlush(MemData);
}
DEF_OP(CacheLineClean) {
auto Op = IROp->C<IR::IROp_CacheLineClean>();
char *MemData = *GetSrc<char **>(Data->SSAData, Op->Addr);
// 64-byte cache line clear
CacheLineClean(MemData);
}
DEF_OP(CacheLineZero) {
auto Op = IROp->C<IR::IROp_CacheLineZero>();

View File

@ -888,6 +888,7 @@ void *Arm64JITCore::CompileCode(uint64_t Entry,
}
break;
REGISTER_OP(CACHELINECLEAR, CacheLineClear);
REGISTER_OP(CACHELINECLEAN, CacheLineClean);
REGISTER_OP(CACHELINEZERO, CacheLineZero);
// Misc ops

View File

@ -356,6 +356,7 @@ private:
DEF_OP(ParanoidLoadMemTSO);
DEF_OP(ParanoidStoreMemTSO);
DEF_OP(CacheLineClear);
DEF_OP(CacheLineClean);
DEF_OP(CacheLineZero);
///< Misc ops

View File

@ -1496,7 +1496,24 @@ DEF_OP(CacheLineClear) {
dc(ARMEmitter::DataCacheOperation::CIVAC, TMP1);
add(ARMEmitter::Size::i64Bit, TMP1, TMP1, CTX->HostFeatures.DCacheLineSize);
}
dsb(FEXCore::ARMEmitter::BarrierScope::ISH);
if (Op->Serialize) {
// If requested, serialized all of the data cache operations.
dsb(FEXCore::ARMEmitter::BarrierScope::ISH);
}
}
DEF_OP(CacheLineClean) {
auto Op = IROp->C<IR::IROp_CacheLineClean>();
auto MemReg = GetReg(Op->Addr.ID());
// Clean dcache only
mov(TMP1, MemReg.X());
for (size_t i = 0; i < std::max(1U, CTX->HostFeatures.DCacheLineSize / 64U); ++i) {
dc(ARMEmitter::DataCacheOperation::CVAC, TMP1);
add(ARMEmitter::Size::i64Bit, TMP1, TMP1, CTX->HostFeatures.DCacheLineSize);
}
}
DEF_OP(CacheLineZero) {

View File

@ -348,6 +348,7 @@ private:
DEF_OP(LoadMem);
DEF_OP(StoreMem);
DEF_OP(CacheLineClear);
DEF_OP(CacheLineClean);
DEF_OP(CacheLineZero);
///< Misc ops

View File

@ -771,7 +771,19 @@ DEF_OP(CacheLineClear) {
Xbyak::Reg MemReg = GetSrc<RA_64>(Op->Addr.ID());
clflush(ptr [MemReg]);
if (Op->Serialize) {
clflush(ptr [MemReg]);
}
else {
clflushopt(ptr [MemReg]);
}
}
DEF_OP(CacheLineClean) {
auto Op = IROp->C<IR::IROp_CacheLineClean>();
Xbyak::Reg MemReg = GetSrc<RA_64>(Op->Addr.ID());
clwb(ptr [MemReg]);
}
DEF_OP(CacheLineZero) {
@ -809,6 +821,7 @@ void X86JITCore::RegisterMemoryHandlers() {
REGISTER_OP(LOADMEMTSO, LoadMem);
REGISTER_OP(STOREMEMTSO, StoreMem);
REGISTER_OP(CACHELINECLEAR, CacheLineClear);
REGISTER_OP(CACHELINECLEAN, CacheLineClean);
REGISTER_OP(CACHELINEZERO, CacheLineZero);
#undef REGISTER_OP
}

View File

@ -5618,6 +5618,29 @@ void OpDispatchBuilder::FenceOp(OpcodeArgs) {
_Fence({FenceType});
}
void OpDispatchBuilder::CLWB(OpcodeArgs) {
OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, -1, false);
DestMem = AppendSegmentOffset(DestMem, Op->Flags);
_CacheLineClean(DestMem);
}
void OpDispatchBuilder::CLFLUSHOPT(OpcodeArgs) {
OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, -1, false);
DestMem = AppendSegmentOffset(DestMem, Op->Flags);
_CacheLineClear(DestMem, false);
}
void OpDispatchBuilder::MemFenceOrXSAVEOPT(OpcodeArgs) {
if (Op->ModRM == 0xF0) {
// 0xF0 is MFENCE
_Fence(FEXCore::IR::Fence_LoadStore);
}
else {
LogMan::Msg::EFmt("Application tried using XSAVEOPT");
UnimplementedOp(Op);
}
}
void OpDispatchBuilder::StoreFenceOrCLFlush(OpcodeArgs) {
if (Op->ModRM == 0xF8) {
// 0xF8 is SFENCE
@ -5627,7 +5650,7 @@ void OpDispatchBuilder::StoreFenceOrCLFlush(OpcodeArgs) {
// This is a CLFlush
OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, -1, false);
DestMem = AppendSegmentOffset(DestMem, Op->Flags);
_CacheLineClear(DestMem);
_CacheLineClear(DestMem, true);
}
}
@ -6765,12 +6788,15 @@ constexpr uint16_t PF_F2 = 3;
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 2), 1, &OpDispatchBuilder::LDMXCSR},
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 3), 1, &OpDispatchBuilder::STMXCSR},
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 5), 1, &OpDispatchBuilder::FenceOp<FEXCore::IR::Fence_Load.Val>}, //LFENCE
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 6), 1, &OpDispatchBuilder::FenceOp<FEXCore::IR::Fence_LoadStore.Val>}, //MFENCE
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 6), 1, &OpDispatchBuilder::MemFenceOrXSAVEOPT}, //MFENCE
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 7), 1, &OpDispatchBuilder::StoreFenceOrCLFlush}, //SFENCE
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_F3, 5), 1, &OpDispatchBuilder::UnimplementedOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_F3, 6), 1, &OpDispatchBuilder::UnimplementedOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_66, 6), 1, &OpDispatchBuilder::CLWB},
{OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_66, 7), 1, &OpDispatchBuilder::CLFLUSHOPT},
// GROUP 16
{OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_NONE, 0), 8, &OpDispatchBuilder::NOPOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_F3, 0), 8, &OpDispatchBuilder::NOPOp},

View File

@ -689,6 +689,9 @@ public:
template<uint8_t FenceType>
void FenceOp(OpcodeArgs);
void CLWB(OpcodeArgs);
void CLFLUSHOPT(OpcodeArgs);
void MemFenceOrXSAVEOPT(OpcodeArgs);
void StoreFenceOrCLFlush(OpcodeArgs);
void CLZeroOp(OpcodeArgs);
void RDTSCPOp(OpcodeArgs);

View File

@ -338,7 +338,7 @@ void InitializeSecondaryGroupTables() {
{OPD(TYPE_GROUP_15, PF_NONE, 3), 1, X86InstInfo{"STMXCSR", TYPE_INST, GenFlagsSameSize(SIZE_32BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0, nullptr}},
{OPD(TYPE_GROUP_15, PF_NONE, 4), 1, X86InstInfo{"XSAVE", TYPE_PRIV, FLAGS_NONE, 0, nullptr}},
{OPD(TYPE_GROUP_15, PF_NONE, 5), 1, X86InstInfo{"LFENCE/XRSTOR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}},
{OPD(TYPE_GROUP_15, PF_NONE, 6), 1, X86InstInfo{"MFENCE/XSAVEOPT", TYPE_INST, FLAGS_MODRM, 0, nullptr}},
{OPD(TYPE_GROUP_15, PF_NONE, 6), 1, X86InstInfo{"MFENCE/XSAVEOPT", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}},
{OPD(TYPE_GROUP_15, PF_NONE, 7), 1, X86InstInfo{"SFENCE/CLFLUSH", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}},
{OPD(TYPE_GROUP_15, PF_F3, 0), 1, X86InstInfo{"RDFSBASE", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY, 0, nullptr}},
@ -356,8 +356,8 @@ void InitializeSecondaryGroupTables() {
{OPD(TYPE_GROUP_15, PF_66, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
{OPD(TYPE_GROUP_15, PF_66, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
{OPD(TYPE_GROUP_15, PF_66, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
{OPD(TYPE_GROUP_15, PF_66, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
{OPD(TYPE_GROUP_15, PF_66, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
{OPD(TYPE_GROUP_15, PF_66, 6), 1, X86InstInfo{"CLWB", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}},
{OPD(TYPE_GROUP_15, PF_66, 7), 1, X86InstInfo{"CLFLUSHOPT", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, nullptr}},
{OPD(TYPE_GROUP_15, PF_F2, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
{OPD(TYPE_GROUP_15, PF_F2, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},

View File

@ -479,9 +479,17 @@
]
},
"CacheLineClear GPR:$Addr": {
"CacheLineClear GPR:$Addr, i1:$Serialize": {
"Desc": ["Does a 64 byte cacheline clear at the address specified",
"Only clears the data cachelines. Doesn't do any zeroing"
"Only clears the data cachelines. Doesn't do any zeroing",
"Can skip serialization if requested."
],
"HasSideEffects": true
},
"CacheLineClean GPR:$Addr": {
"Desc": ["Does a 64 byte cacheline cleanat the address specified",
"Only cleans the data cachelines. Doesn't do any zeroing",
"Skips the invalidation step of the CacheLineClear operation"
],
"HasSideEffects": true
},

View File

@ -27,6 +27,7 @@ class HostFeatures final {
bool SupportsSHA{};
bool SupportsBMI1{};
bool SupportsBMI2{};
bool SupportsCLWB{};
bool SupportsPMULL_128Bit{};
// Float exception behaviour

2
External/xbyak vendored

@ -1 +1 @@
Subproject commit ea21d6e295ede3586ea5c62030bc1c50e2cb7e31
Subproject commit b0f0c7805ad16d9abbac0f8101cc226669983b57

View File

@ -72,6 +72,7 @@ class HostFeatures(Flag) :
FEATURE_CLZERO = (1 << 5)
FEATURE_BMI1 = (1 << 6)
FEATURE_BMI2 = (1 << 7)
FEATURE_CLWB = (1 << 8)
RegStringLookup = {
"NONE": Regs.REG_NONE,
@ -143,6 +144,7 @@ HostFeaturesLookup = {
"CLZERO" : HostFeatures.FEATURE_CLZERO,
"BMI1" : HostFeatures.FEATURE_BMI1,
"BMI2" : HostFeatures.FEATURE_BMI2,
"CLWB" : HostFeatures.FEATURE_CLWB,
}
def parse_hexstring(s):

View File

@ -385,6 +385,7 @@ namespace FEX::HarnessHelper {
FEATURE_CLZERO = (1 << 5),
FEATURE_BMI1 = (1 << 6),
FEATURE_BMI2 = (1 << 7),
FEATURE_CLWB = (1 << 8),
};
bool Requires3DNow() const { return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_3DNOW; }
@ -395,6 +396,7 @@ namespace FEX::HarnessHelper {
bool RequiresCLZERO() const { return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_CLZERO; }
bool RequiresBMI1() const { return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_BMI1; }
bool RequiresBMI2() const { return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_BMI2; }
bool RequiresCLWB() const { return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_CLWB; }
private:
FEX_CONFIG_OPT(ConfigDumpGPRs, DUMPGPRS);
@ -534,6 +536,7 @@ namespace FEX::HarnessHelper {
bool RequiresCLZERO() const { return Config.RequiresCLZERO(); }
bool RequiresBMI1() const { return Config.RequiresBMI1(); }
bool RequiresBMI2() const { return Config.RequiresBMI2(); }
bool RequiresCLWB() const { return Config.RequiresCLWB(); }
private:
constexpr static uint64_t STACK_SIZE = FHU::FEX_PAGE_SIZE;

View File

@ -178,7 +178,8 @@ int main(int argc, char **argv, char **const envp) {
(!HostFeatures.SupportsSHA && Loader.RequiresSHA()) ||
(!HostFeatures.SupportsCLZERO && Loader.RequiresCLZERO()) ||
(!HostFeatures.SupportsBMI1 && Loader.RequiresBMI1()) ||
(!HostFeatures.SupportsBMI2 && Loader.RequiresBMI2());
(!HostFeatures.SupportsBMI2 && Loader.RequiresBMI2()) ||
(!HostFeatures.SupportsCLWB && Loader.RequiresCLWB());
if (TestUnsupported) {
FEXCore::Context::DestroyContext(CTX);

View File

@ -87,7 +87,7 @@ public:
Label Gate{};
// Patch gate entry point
// mov(dword[rip + Gate], edi)
jmpf(ptr[rip + Gate]);
jmp(qword [rip + Gate], LabelType::T_FAR);
L(Gate);
dd(0x1'0000); // This is a 32-bit offset from the start of the gate. We start at 0x1'0000 + 0

View File

@ -0,0 +1,14 @@
%ifdef CONFIG
{
"RegData": {
"RAX": "1"
}
}
%endif
mov rdx, 0xe0000000
; Just ensures the code is executed.
clflushopt [rdx]
mov rax, 1
hlt

View File

@ -0,0 +1,15 @@
%ifdef CONFIG
{
"RegData": {
"RAX": "1"
},
"HostFeatures": ["CLWB"]
}
%endif
mov rdx, 0xe0000000
; Just ensures the code is executed.
clwb [rdx]
mov rax, 1
hlt