mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-01-22 06:20:58 +00:00
FEXCore: Implements support for xgetbv
This returns the `XFEATURE_ENABLED_MASK` register which reports what features are enabled on the CPU. This behaves similarly to CPUID where it uses an index register in ecx. This is a prerequisite to enabling XSAVE/XRSTOR and AVX since applications will expect this to exist. xsetbv is a privileged instruction and doesn't need to be implemented.
This commit is contained in:
parent
de0f3984e9
commit
5646428640
@ -87,6 +87,10 @@ namespace FEXCore::Context {
|
||||
return CPUID.RunFunction(Function, Leaf);
|
||||
}
|
||||
|
||||
FEXCore::CPUID::XCRResults FEXCore::Context::ContextImpl::RunXCRFunction(uint32_t Function) {
|
||||
return CPUID.RunXCRFunction(Function);
|
||||
}
|
||||
|
||||
FEXCore::CPUID::FunctionResults FEXCore::Context::ContextImpl::RunCPUIDFunctionName(uint32_t Function, uint32_t Leaf, uint32_t CPU) {
|
||||
return CPUID.RunFunctionName(Function, Leaf, CPU);
|
||||
}
|
||||
|
@ -159,6 +159,7 @@ namespace FEXCore::Context {
|
||||
void SetSyscallHandler(FEXCore::HLE::SyscallHandler *Handler) override;
|
||||
|
||||
FEXCore::CPUID::FunctionResults RunCPUIDFunction(uint32_t Function, uint32_t Leaf) override;
|
||||
FEXCore::CPUID::XCRResults RunXCRFunction(uint32_t Function) override;
|
||||
FEXCore::CPUID::FunctionResults RunCPUIDFunctionName(uint32_t Function, uint32_t Leaf, uint32_t CPU) override;
|
||||
|
||||
FEXCore::IR::AOTIRCacheEntry *LoadAOTIRCacheEntry(const fextl::string& Name) override;
|
||||
|
29
External/FEXCore/Source/Interface/Core/CPUID.cpp
vendored
29
External/FEXCore/Source/Interface/Core/CPUID.cpp
vendored
@ -76,10 +76,6 @@ static uint32_t GetCPUID() {
|
||||
return CPU;
|
||||
}
|
||||
|
||||
// TODO: Replace usages with CTX->HostFeatures.EnableAVX
|
||||
// when AVX implementations are further along.
|
||||
constexpr uint32_t SUPPORTS_AVX = 0;
|
||||
|
||||
#ifdef CPUID_AMD
|
||||
constexpr uint32_t FAMILY_IDENTIFIER =
|
||||
0 | // Stepping
|
||||
@ -441,7 +437,7 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_01h(uint32_t Leaf) {
|
||||
(CTX->HostFeatures.SupportsAES << 25) | // AES
|
||||
(0 << 26) | // XSAVE
|
||||
(0 << 27) | // OSXSAVE
|
||||
(SUPPORTS_AVX << 28) | // AVX
|
||||
(SupportsAVX() << 28) | // AVX
|
||||
(0 << 29) | // F16C
|
||||
(CTX->HostFeatures.SupportsRAND << 30) | // RDRAND
|
||||
(Hypervisor << 31);
|
||||
@ -736,13 +732,13 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_0Dh(uint32_t Leaf) {
|
||||
// Leaf 0
|
||||
FEXCore::CPUID::FunctionResults Res{};
|
||||
|
||||
uint32_t XFeatureSupportedSizeMax = SUPPORTS_AVX ? 0x0000'0340 : 0x0000'0240; // XFeatureEnabledSizeMax: Legacy Header + FPU/SSE + AVX
|
||||
uint32_t XFeatureSupportedSizeMax = SupportsAVX() ? 0x0000'0340 : 0x0000'0240; // XFeatureEnabledSizeMax: Legacy Header + FPU/SSE + AVX
|
||||
if (Leaf == 0) {
|
||||
// XFeatureSupportedMask[31:0]
|
||||
Res.eax =
|
||||
(1 << 0) | // X87 support
|
||||
(1 << 1) | // 128-bit SSE support
|
||||
(SUPPORTS_AVX << 2) | // 256-bit AVX support
|
||||
(SupportsAVX() << 2) | // 256-bit AVX support
|
||||
(0b00 << 3) | // MPX State
|
||||
(0b000 << 5) | // AVX-512 state
|
||||
(0 << 8) | // "Used for IA32_XSS" ... Used for what?
|
||||
@ -776,8 +772,8 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_0Dh(uint32_t Leaf) {
|
||||
Res.edx = 0;
|
||||
}
|
||||
else if (Leaf == 2) {
|
||||
Res.eax = SUPPORTS_AVX ? 0x0000'0100 : 0; // YmmSaveStateSize
|
||||
Res.ebx = SUPPORTS_AVX ? 0x0000'0240 : 0; // YmmSaveStateOffset
|
||||
Res.eax = SupportsAVX() ? 0x0000'0100 : 0; // YmmSaveStateSize
|
||||
Res.ebx = SupportsAVX() ? 0x0000'0240 : 0; // YmmSaveStateOffset
|
||||
|
||||
// Reserved
|
||||
Res.ecx = 0;
|
||||
@ -1212,11 +1208,26 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_Reserved(uint32_t Leaf) {
|
||||
return Res;
|
||||
}
|
||||
|
||||
FEXCore::CPUID::XCRResults CPUIDEmu::XCRFunction_0h() {
|
||||
// This just returns XCR0
|
||||
FEXCore::CPUID::XCRResults Res{
|
||||
.eax = static_cast<uint32_t>(XCR0),
|
||||
.edx = static_cast<uint32_t>(XCR0 >> 32),
|
||||
};
|
||||
|
||||
return Res;
|
||||
}
|
||||
|
||||
void CPUIDEmu::Init(FEXCore::Context::ContextImpl *ctx) {
|
||||
CTX = ctx;
|
||||
|
||||
// Setup some state tracking
|
||||
SetupHostHybridFlag();
|
||||
|
||||
// TODO: Enable once AVX is supported.
|
||||
if (false && CTX->HostFeatures.SupportsAVX) {
|
||||
XCR0 |= XCR0_AVX;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
41
External/FEXCore/Source/Interface/Core/CPUID.h
vendored
41
External/FEXCore/Source/Interface/Core/CPUID.h
vendored
@ -63,13 +63,52 @@ public:
|
||||
return Function_8000_0004h(Leaf, CPU % PerCPUData.size());
|
||||
}
|
||||
|
||||
FEXCore::CPUID::XCRResults RunXCRFunction(uint32_t Function) {
|
||||
if (Function >= 1) {
|
||||
// XCR function 1 is not yet supported.
|
||||
return {};
|
||||
}
|
||||
|
||||
return XCRFunction_0h();
|
||||
}
|
||||
|
||||
private:
|
||||
FEXCore::Context::ContextImpl *CTX;
|
||||
bool Hybrid{};
|
||||
FEX_CONFIG_OPT(Cores, THREADS);
|
||||
FEX_CONFIG_OPT(HideHypervisorBit, HIDEHYPERVISORBIT);
|
||||
|
||||
// XFEATURE_ENABLED_MASK
|
||||
// Mask that configures what features are enabled on the CPU.
|
||||
// Affects XSAVE and XRSTOR when modified.
|
||||
// Bit layout is as follows.
|
||||
// [0] - x87 enabled
|
||||
// [1] - SSE enabled
|
||||
// [2] - YMM enabled (256-bit SSE)
|
||||
// [8:3] - Reserved. MBZ.
|
||||
// [9] - MPK
|
||||
// [10] - Reserved. MBZ.
|
||||
// [11] - CET_U
|
||||
// [12] - CET_S
|
||||
// [61:13] - Reserved. MBZ.
|
||||
// [62] - LWP (Lightweight profiling)
|
||||
// [63] - Reserved for XCR bit vector expansion. MBZ.
|
||||
// Always enable x87 and SSE by default.
|
||||
constexpr static uint64_t XCR0_X87 = 1ULL << 0;
|
||||
constexpr static uint64_t XCR0_SSE = 1ULL << 1;
|
||||
constexpr static uint64_t XCR0_AVX = 1ULL << 2;
|
||||
|
||||
uint64_t XCR0 {
|
||||
XCR0_X87 |
|
||||
XCR0_SSE
|
||||
};
|
||||
|
||||
uint32_t SupportsAVX() const {
|
||||
return (XCR0 & XCR0_AVX) ? 1 : 0;
|
||||
}
|
||||
|
||||
using FunctionHandler = FEXCore::CPUID::FunctionResults (CPUIDEmu::*)(uint32_t Leaf);
|
||||
|
||||
struct CPUData {
|
||||
const char *ProductName{};
|
||||
#ifdef _M_ARM_64
|
||||
@ -109,6 +148,8 @@ private:
|
||||
FEXCore::CPUID::FunctionResults Function_8000_001Dh(uint32_t Leaf);
|
||||
FEXCore::CPUID::FunctionResults Function_Reserved(uint32_t Leaf);
|
||||
|
||||
FEXCore::CPUID::XCRResults XCRFunction_0h();
|
||||
|
||||
void SetupHostHybridFlag();
|
||||
static constexpr std::array<FunctionHandler, 27> Primary = {
|
||||
// 0: Highest function parameter and ID
|
||||
|
@ -143,6 +143,15 @@ DEF_OP(CPUID) {
|
||||
memcpy(DstPtr, &Results, sizeof(uint32_t) * 4);
|
||||
}
|
||||
|
||||
DEF_OP(XGETBV) {
|
||||
auto Op = IROp->C<IR::IROp_XGetBV>();
|
||||
uint32_t *DstPtr = GetDest<uint32_t*>(Data->SSAData, Node);
|
||||
const uint32_t Function = *GetSrc<uint32_t*>(Data->SSAData, Op->Function);
|
||||
|
||||
auto Results = Data->State->CTX->RunXCRFunction(Function);
|
||||
memcpy(DstPtr, &Results, sizeof(uint32_t) * 2);
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
|
||||
} // namespace FEXCore::CPU
|
||||
|
@ -118,6 +118,7 @@ constexpr OpHandlerArray InterpreterOpHandlers = [] {
|
||||
REGISTER_OP(VALIDATECODE, ValidateCode);
|
||||
REGISTER_OP(THREADREMOVECODEENTRY, ThreadRemoveCodeEntry);
|
||||
REGISTER_OP(CPUID, CPUID);
|
||||
REGISTER_OP(XGETBV, XGETBV);
|
||||
|
||||
// Conversion ops
|
||||
REGISTER_OP(VINSGPR, VInsGPR);
|
||||
|
@ -154,6 +154,7 @@ namespace FEXCore::CPU {
|
||||
DEF_OP(ValidateCode);
|
||||
DEF_OP(ThreadRemoveCodeEntry);
|
||||
DEF_OP(CPUID);
|
||||
DEF_OP(XGETBV);
|
||||
|
||||
///< Conversion ops
|
||||
DEF_OP(VInsGPR);
|
||||
|
@ -450,6 +450,34 @@ DEF_OP(CPUID) {
|
||||
mov(ARMEmitter::Size::i64Bit, Dst.second, ARMEmitter::Reg::r1);
|
||||
}
|
||||
|
||||
DEF_OP(XGETBV) {
|
||||
auto Op = IROp->C<IR::IROp_XGetBV>();
|
||||
|
||||
PushDynamicRegsAndLR(TMP1);
|
||||
SpillStaticRegs();
|
||||
|
||||
// x0 = CPUID Handler
|
||||
// x1 = XCR Function
|
||||
ldr(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.CPUIDObj));
|
||||
ldr(ARMEmitter::XReg::x2, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.XCRFunction));
|
||||
mov(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r1, GetReg(Op->Function.ID()));
|
||||
#ifdef VIXL_SIMULATOR
|
||||
GenerateIndirectRuntimeCall<uint64_t, void*, uint32_t>(ARMEmitter::Reg::r2);
|
||||
#else
|
||||
blr(ARMEmitter::Reg::r2);
|
||||
#endif
|
||||
|
||||
FillStaticRegs();
|
||||
|
||||
PopDynamicRegsAndLR();
|
||||
|
||||
// Results are in x0
|
||||
// Results want to be in a i32v2 vector
|
||||
auto Dst = GetRegPair(Node);
|
||||
mov(ARMEmitter::Size::i32Bit, Dst.first, ARMEmitter::Reg::r0);
|
||||
ubfx(ARMEmitter::Size::i64Bit, Dst.second, ARMEmitter::Reg::r0, 32, 32);
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
}
|
||||
|
||||
|
@ -615,6 +615,11 @@ Arm64JITCore::Arm64JITCore(FEXCore::Context::ContextImpl *ctx, FEXCore::Core::In
|
||||
Common.CPUIDFunction = PMF.GetConvertedPointer();
|
||||
}
|
||||
|
||||
{
|
||||
FEXCore::Utils::MemberFunctionToPointerCast PMF(&FEXCore::CPUIDEmu::RunXCRFunction);
|
||||
Common.XCRFunction = PMF.GetConvertedPointer();
|
||||
}
|
||||
|
||||
Common.SyscallHandlerObj = reinterpret_cast<uint64_t>(CTX->SyscallHandler);
|
||||
Common.SyscallHandlerFunc = reinterpret_cast<uint64_t>(FEXCore::Context::HandleSyscall);
|
||||
Common.ExitFunctionLink = reinterpret_cast<uintptr_t>(&Context::ContextImpl::ThreadExitFunctionLink<Arm64JITCore_ExitFunctionLink>);
|
||||
@ -911,6 +916,7 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry,
|
||||
REGISTER_OP(VALIDATECODE, ValidateCode);
|
||||
REGISTER_OP(THREADREMOVECODEENTRY, ThreadRemoveCodeEntry);
|
||||
REGISTER_OP(CPUID, CPUID);
|
||||
REGISTER_OP(XGETBV, XGETBV);
|
||||
|
||||
// Conversion ops
|
||||
REGISTER_OP(VINSGPR, VInsGPR);
|
||||
|
@ -307,6 +307,7 @@ private:
|
||||
DEF_OP(ValidateCode);
|
||||
DEF_OP(ThreadRemoveCodeEntry);
|
||||
DEF_OP(CPUID);
|
||||
DEF_OP(XGETBV);
|
||||
|
||||
///< Conversion ops
|
||||
DEF_OP(VInsGPR);
|
||||
|
@ -307,6 +307,42 @@ DEF_OP(CPUID) {
|
||||
mov(Dst.second, rdx);
|
||||
}
|
||||
|
||||
DEF_OP(XGETBV) {
|
||||
auto Op = IROp->C<IR::IROp_XGetBV>();
|
||||
|
||||
for (auto &Reg : RA64)
|
||||
push(Reg);
|
||||
|
||||
// CPUID ABI
|
||||
// this: rdi
|
||||
// Function: rsi
|
||||
//
|
||||
// Result: RAX, RDX. 4xi32
|
||||
|
||||
// rsi can be in the source registers, so copy argument to edx first
|
||||
mov (esi, GetSrc<RA_32>(Op->Function.ID()));
|
||||
mov (rdi, qword [STATE + offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.CPUIDObj)]);
|
||||
|
||||
auto NumPush = RA64.size();
|
||||
|
||||
if (NumPush & 1)
|
||||
sub(rsp, 8); // Align
|
||||
|
||||
// {rdi, rsi, rdx}
|
||||
call(qword [STATE + offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.XCRFunction)]);
|
||||
|
||||
if (NumPush & 1)
|
||||
add(rsp, 8); // Align
|
||||
|
||||
for (uint32_t i = RA64.size(); i > 0; --i)
|
||||
pop(RA64[i - 1]);
|
||||
|
||||
auto Dst = GetSrcPair<RA_64>(Node);
|
||||
mov(Dst.first.cvt32(), eax);
|
||||
mov(Dst.second, rax);
|
||||
shr(Dst.second, 32);
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
void X86JITCore::RegisterBranchHandlers() {
|
||||
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &X86JITCore::Op_##x
|
||||
@ -319,6 +355,7 @@ void X86JITCore::RegisterBranchHandlers() {
|
||||
REGISTER_OP(VALIDATECODE, ValidateCode);
|
||||
REGISTER_OP(THREADREMOVECODEENTRY, ThreadRemoveCodeEntry);
|
||||
REGISTER_OP(CPUID, CPUID);
|
||||
REGISTER_OP(XGETBV, XGETBV);
|
||||
#undef REGISTER_OP
|
||||
}
|
||||
}
|
||||
|
@ -444,6 +444,11 @@ X86JITCore::X86JITCore(FEXCore::Context::ContextImpl *ctx, FEXCore::Core::Intern
|
||||
Common.CPUIDFunction = PMF.GetConvertedPointer();
|
||||
}
|
||||
|
||||
{
|
||||
FEXCore::Utils::MemberFunctionToPointerCast PMF(&FEXCore::CPUIDEmu::RunXCRFunction);
|
||||
Common.XCRFunction = PMF.GetConvertedPointer();
|
||||
}
|
||||
|
||||
Common.SyscallHandlerObj = reinterpret_cast<uint64_t>(CTX->SyscallHandler);
|
||||
Common.SyscallHandlerFunc = reinterpret_cast<uint64_t>(FEXCore::Context::HandleSyscall);
|
||||
Common.ExitFunctionLink = reinterpret_cast<uintptr_t>(&Context::ContextImpl::ThreadExitFunctionLink<X86JITCore_ExitFunctionLink>);
|
||||
|
@ -313,6 +313,7 @@ private:
|
||||
DEF_OP(ValidateCode);
|
||||
DEF_OP(ThreadRemoveCodeEntry);
|
||||
DEF_OP(CPUID);
|
||||
DEF_OP(XGETBV);
|
||||
|
||||
///< Conversion ops
|
||||
DEF_OP(VInsGPR);
|
||||
|
@ -1779,6 +1779,18 @@ void OpDispatchBuilder::CPUIDOp(OpcodeArgs) {
|
||||
StoreGPRRegister(X86State::REG_RCX, _Bfe(32, 0, Result_Upper));
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::XGetBVOp(OpcodeArgs) {
|
||||
OrderedNode *Function = LoadGPRRegister(X86State::REG_RCX);
|
||||
|
||||
auto Res = _XGetBV(Function);
|
||||
|
||||
OrderedNode *Result_Lower = _ExtractElementPair(Res, 0);
|
||||
OrderedNode *Result_Upper = _ExtractElementPair(Res, 1);
|
||||
|
||||
StoreGPRRegister(X86State::REG_RAX, Result_Lower);
|
||||
StoreGPRRegister(X86State::REG_RDX, Result_Upper);
|
||||
}
|
||||
|
||||
template<bool SHL1Bit>
|
||||
void OpDispatchBuilder::SHLOp(OpcodeArgs) {
|
||||
OrderedNode *Src{};
|
||||
@ -6736,7 +6748,7 @@ constexpr uint16_t PF_F2 = 3;
|
||||
|
||||
constexpr std::tuple<uint8_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> SecondaryModRMExtensionOpTable[] = {
|
||||
// REG /2
|
||||
{((1 << 3) | 0), 1, &OpDispatchBuilder::UnimplementedOp},
|
||||
{((1 << 3) | 0), 1, &OpDispatchBuilder::XGetBVOp},
|
||||
|
||||
// REG /7
|
||||
{((3 << 3) | 1), 1, &OpDispatchBuilder::RDTSCPOp},
|
||||
|
@ -219,6 +219,7 @@ public:
|
||||
void MOVOffsetOp(OpcodeArgs);
|
||||
void CMOVOp(OpcodeArgs);
|
||||
void CPUIDOp(OpcodeArgs);
|
||||
void XGetBVOp(OpcodeArgs);
|
||||
template<bool SHL1Bit>
|
||||
void SHLOp(OpcodeArgs);
|
||||
void SHLImmediateOp(OpcodeArgs);
|
||||
|
7
External/FEXCore/Source/Interface/IR/IR.json
vendored
7
External/FEXCore/Source/Interface/IR/IR.json
vendored
@ -297,6 +297,13 @@
|
||||
],
|
||||
"DestSize": "16",
|
||||
"NumElements": "2"
|
||||
},
|
||||
"GPRPair = XGetBV GPR:$Function": {
|
||||
"Desc": ["Calls in to the XCR handler function to return emulated XCR",
|
||||
"Returns a 64bit GPR pair that fits emulated EAX, EDX respectively"
|
||||
],
|
||||
"DestSize": "8",
|
||||
"NumElements": "2"
|
||||
}
|
||||
},
|
||||
"Moves": {
|
||||
|
@ -5,5 +5,9 @@ namespace FEXCore::CPUID {
|
||||
struct FunctionResults {
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
};
|
||||
|
||||
struct XCRResults {
|
||||
uint32_t eax, edx;
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -259,6 +259,7 @@ namespace FEXCore::Context {
|
||||
FEX_DEFAULT_VISIBILITY virtual void SetSyscallHandler(FEXCore::HLE::SyscallHandler *Handler) = 0;
|
||||
|
||||
FEX_DEFAULT_VISIBILITY virtual FEXCore::CPUID::FunctionResults RunCPUIDFunction(uint32_t Function, uint32_t Leaf) = 0;
|
||||
FEX_DEFAULT_VISIBILITY virtual FEXCore::CPUID::XCRResults RunXCRFunction(uint32_t Function) = 0;
|
||||
FEX_DEFAULT_VISIBILITY virtual FEXCore::CPUID::FunctionResults RunCPUIDFunctionName(uint32_t Function, uint32_t Leaf, uint32_t CPU) = 0;
|
||||
|
||||
FEX_DEFAULT_VISIBILITY virtual FEXCore::IR::AOTIRCacheEntry *LoadAOTIRCacheEntry(const fextl::string& Name) = 0;
|
||||
|
@ -144,6 +144,7 @@ namespace FEXCore::Core {
|
||||
uint64_t ThreadRemoveCodeEntryFromJIT{};
|
||||
uint64_t CPUIDObj{};
|
||||
uint64_t CPUIDFunction{};
|
||||
uint64_t XCRFunction{};
|
||||
uint64_t SyscallHandlerObj{};
|
||||
uint64_t SyscallHandlerFunc{};
|
||||
uint64_t ExitFunctionLink{};
|
||||
|
18
unittests/ASM/SecondaryModRM/Reg_2_0.asm
Normal file
18
unittests/ASM/SecondaryModRM/Reg_2_0.asm
Normal file
@ -0,0 +1,18 @@
|
||||
%ifdef CONFIG
|
||||
{
|
||||
"RegData": {
|
||||
"RAX": "0x3",
|
||||
"RDX": "0x0"
|
||||
}
|
||||
}
|
||||
%endif
|
||||
|
||||
mov ecx, 0
|
||||
xgetbv
|
||||
|
||||
; Mask only the lower two bits to get host and FEX runners to match.
|
||||
; This way we can test that we're getting data back.
|
||||
; Bit 0 and 1 refer to X87 and SSE respectively.
|
||||
and eax, 0x3
|
||||
|
||||
hlt
|
Loading…
x
Reference in New Issue
Block a user