FEXCore: Implements support for xgetbv

This returns the `XFEATURE_ENABLED_MASK` register which reports what
features are enabled on the CPU.
This behaves similarly to CPUID where it uses an index register in ecx.

This is a prerequisite to enabling XSAVE/XRSTOR and AVX since
applications will expect this to exist.

xsetbv is a privileged instruction and doesn't need to be implemented.
This commit is contained in:
Ryan Houdek 2023-05-22 15:49:15 -07:00
parent de0f3984e9
commit 5646428640
20 changed files with 200 additions and 10 deletions

View File

@ -87,6 +87,10 @@ namespace FEXCore::Context {
return CPUID.RunFunction(Function, Leaf);
}
FEXCore::CPUID::XCRResults FEXCore::Context::ContextImpl::RunXCRFunction(uint32_t Function) {
return CPUID.RunXCRFunction(Function);
}
FEXCore::CPUID::FunctionResults FEXCore::Context::ContextImpl::RunCPUIDFunctionName(uint32_t Function, uint32_t Leaf, uint32_t CPU) {
return CPUID.RunFunctionName(Function, Leaf, CPU);
}

View File

@ -159,6 +159,7 @@ namespace FEXCore::Context {
void SetSyscallHandler(FEXCore::HLE::SyscallHandler *Handler) override;
FEXCore::CPUID::FunctionResults RunCPUIDFunction(uint32_t Function, uint32_t Leaf) override;
FEXCore::CPUID::XCRResults RunXCRFunction(uint32_t Function) override;
FEXCore::CPUID::FunctionResults RunCPUIDFunctionName(uint32_t Function, uint32_t Leaf, uint32_t CPU) override;
FEXCore::IR::AOTIRCacheEntry *LoadAOTIRCacheEntry(const fextl::string& Name) override;

View File

@ -76,10 +76,6 @@ static uint32_t GetCPUID() {
return CPU;
}
// TODO: Replace usages with CTX->HostFeatures.EnableAVX
// when AVX implementations are further along.
constexpr uint32_t SUPPORTS_AVX = 0;
#ifdef CPUID_AMD
constexpr uint32_t FAMILY_IDENTIFIER =
0 | // Stepping
@ -441,7 +437,7 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_01h(uint32_t Leaf) {
(CTX->HostFeatures.SupportsAES << 25) | // AES
(0 << 26) | // XSAVE
(0 << 27) | // OSXSAVE
(SUPPORTS_AVX << 28) | // AVX
(SupportsAVX() << 28) | // AVX
(0 << 29) | // F16C
(CTX->HostFeatures.SupportsRAND << 30) | // RDRAND
(Hypervisor << 31);
@ -736,13 +732,13 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_0Dh(uint32_t Leaf) {
// Leaf 0
FEXCore::CPUID::FunctionResults Res{};
uint32_t XFeatureSupportedSizeMax = SUPPORTS_AVX ? 0x0000'0340 : 0x0000'0240; // XFeatureEnabledSizeMax: Legacy Header + FPU/SSE + AVX
uint32_t XFeatureSupportedSizeMax = SupportsAVX() ? 0x0000'0340 : 0x0000'0240; // XFeatureEnabledSizeMax: Legacy Header + FPU/SSE + AVX
if (Leaf == 0) {
// XFeatureSupportedMask[31:0]
Res.eax =
(1 << 0) | // X87 support
(1 << 1) | // 128-bit SSE support
(SUPPORTS_AVX << 2) | // 256-bit AVX support
(SupportsAVX() << 2) | // 256-bit AVX support
(0b00 << 3) | // MPX State
(0b000 << 5) | // AVX-512 state
(0 << 8) | // "Used for IA32_XSS" ... Used for what?
@ -776,8 +772,8 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_0Dh(uint32_t Leaf) {
Res.edx = 0;
}
else if (Leaf == 2) {
Res.eax = SUPPORTS_AVX ? 0x0000'0100 : 0; // YmmSaveStateSize
Res.ebx = SUPPORTS_AVX ? 0x0000'0240 : 0; // YmmSaveStateOffset
Res.eax = SupportsAVX() ? 0x0000'0100 : 0; // YmmSaveStateSize
Res.ebx = SupportsAVX() ? 0x0000'0240 : 0; // YmmSaveStateOffset
// Reserved
Res.ecx = 0;
@ -1212,11 +1208,26 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_Reserved(uint32_t Leaf) {
return Res;
}
FEXCore::CPUID::XCRResults CPUIDEmu::XCRFunction_0h() {
// This just returns XCR0
FEXCore::CPUID::XCRResults Res{
.eax = static_cast<uint32_t>(XCR0),
.edx = static_cast<uint32_t>(XCR0 >> 32),
};
return Res;
}
void CPUIDEmu::Init(FEXCore::Context::ContextImpl *ctx) {
CTX = ctx;
// Setup some state tracking
SetupHostHybridFlag();
// TODO: Enable once AVX is supported.
if (false && CTX->HostFeatures.SupportsAVX) {
XCR0 |= XCR0_AVX;
}
}
}

View File

@ -63,13 +63,52 @@ public:
return Function_8000_0004h(Leaf, CPU % PerCPUData.size());
}
FEXCore::CPUID::XCRResults RunXCRFunction(uint32_t Function) {
if (Function >= 1) {
// XCR function 1 is not yet supported.
return {};
}
return XCRFunction_0h();
}
private:
FEXCore::Context::ContextImpl *CTX;
bool Hybrid{};
FEX_CONFIG_OPT(Cores, THREADS);
FEX_CONFIG_OPT(HideHypervisorBit, HIDEHYPERVISORBIT);
// XFEATURE_ENABLED_MASK
// Mask that configures what features are enabled on the CPU.
// Affects XSAVE and XRSTOR when modified.
// Bit layout is as follows.
// [0] - x87 enabled
// [1] - SSE enabled
// [2] - YMM enabled (256-bit SSE)
// [8:3] - Reserved. MBZ.
// [9] - MPK
// [10] - Reserved. MBZ.
// [11] - CET_U
// [12] - CET_S
// [61:13] - Reserved. MBZ.
// [62] - LWP (Lightweight profiling)
// [63] - Reserved for XCR bit vector expansion. MBZ.
// Always enable x87 and SSE by default.
constexpr static uint64_t XCR0_X87 = 1ULL << 0;
constexpr static uint64_t XCR0_SSE = 1ULL << 1;
constexpr static uint64_t XCR0_AVX = 1ULL << 2;
uint64_t XCR0 {
XCR0_X87 |
XCR0_SSE
};
uint32_t SupportsAVX() const {
return (XCR0 & XCR0_AVX) ? 1 : 0;
}
using FunctionHandler = FEXCore::CPUID::FunctionResults (CPUIDEmu::*)(uint32_t Leaf);
struct CPUData {
const char *ProductName{};
#ifdef _M_ARM_64
@ -109,6 +148,8 @@ private:
FEXCore::CPUID::FunctionResults Function_8000_001Dh(uint32_t Leaf);
FEXCore::CPUID::FunctionResults Function_Reserved(uint32_t Leaf);
FEXCore::CPUID::XCRResults XCRFunction_0h();
void SetupHostHybridFlag();
static constexpr std::array<FunctionHandler, 27> Primary = {
// 0: Highest function parameter and ID

View File

@ -143,6 +143,15 @@ DEF_OP(CPUID) {
memcpy(DstPtr, &Results, sizeof(uint32_t) * 4);
}
DEF_OP(XGETBV) {
auto Op = IROp->C<IR::IROp_XGetBV>();
uint32_t *DstPtr = GetDest<uint32_t*>(Data->SSAData, Node);
const uint32_t Function = *GetSrc<uint32_t*>(Data->SSAData, Op->Function);
auto Results = Data->State->CTX->RunXCRFunction(Function);
memcpy(DstPtr, &Results, sizeof(uint32_t) * 2);
}
#undef DEF_OP
} // namespace FEXCore::CPU

View File

@ -118,6 +118,7 @@ constexpr OpHandlerArray InterpreterOpHandlers = [] {
REGISTER_OP(VALIDATECODE, ValidateCode);
REGISTER_OP(THREADREMOVECODEENTRY, ThreadRemoveCodeEntry);
REGISTER_OP(CPUID, CPUID);
REGISTER_OP(XGETBV, XGETBV);
// Conversion ops
REGISTER_OP(VINSGPR, VInsGPR);

View File

@ -154,6 +154,7 @@ namespace FEXCore::CPU {
DEF_OP(ValidateCode);
DEF_OP(ThreadRemoveCodeEntry);
DEF_OP(CPUID);
DEF_OP(XGETBV);
///< Conversion ops
DEF_OP(VInsGPR);

View File

@ -450,6 +450,34 @@ DEF_OP(CPUID) {
mov(ARMEmitter::Size::i64Bit, Dst.second, ARMEmitter::Reg::r1);
}
DEF_OP(XGETBV) {
auto Op = IROp->C<IR::IROp_XGetBV>();
PushDynamicRegsAndLR(TMP1);
SpillStaticRegs();
// x0 = CPUID Handler
// x1 = XCR Function
ldr(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.CPUIDObj));
ldr(ARMEmitter::XReg::x2, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.XCRFunction));
mov(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r1, GetReg(Op->Function.ID()));
#ifdef VIXL_SIMULATOR
GenerateIndirectRuntimeCall<uint64_t, void*, uint32_t>(ARMEmitter::Reg::r2);
#else
blr(ARMEmitter::Reg::r2);
#endif
FillStaticRegs();
PopDynamicRegsAndLR();
// Results are in x0
// Results want to be in a i32v2 vector
auto Dst = GetRegPair(Node);
mov(ARMEmitter::Size::i32Bit, Dst.first, ARMEmitter::Reg::r0);
ubfx(ARMEmitter::Size::i64Bit, Dst.second, ARMEmitter::Reg::r0, 32, 32);
}
#undef DEF_OP
}

View File

@ -615,6 +615,11 @@ Arm64JITCore::Arm64JITCore(FEXCore::Context::ContextImpl *ctx, FEXCore::Core::In
Common.CPUIDFunction = PMF.GetConvertedPointer();
}
{
FEXCore::Utils::MemberFunctionToPointerCast PMF(&FEXCore::CPUIDEmu::RunXCRFunction);
Common.XCRFunction = PMF.GetConvertedPointer();
}
Common.SyscallHandlerObj = reinterpret_cast<uint64_t>(CTX->SyscallHandler);
Common.SyscallHandlerFunc = reinterpret_cast<uint64_t>(FEXCore::Context::HandleSyscall);
Common.ExitFunctionLink = reinterpret_cast<uintptr_t>(&Context::ContextImpl::ThreadExitFunctionLink<Arm64JITCore_ExitFunctionLink>);
@ -911,6 +916,7 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry,
REGISTER_OP(VALIDATECODE, ValidateCode);
REGISTER_OP(THREADREMOVECODEENTRY, ThreadRemoveCodeEntry);
REGISTER_OP(CPUID, CPUID);
REGISTER_OP(XGETBV, XGETBV);
// Conversion ops
REGISTER_OP(VINSGPR, VInsGPR);

View File

@ -307,6 +307,7 @@ private:
DEF_OP(ValidateCode);
DEF_OP(ThreadRemoveCodeEntry);
DEF_OP(CPUID);
DEF_OP(XGETBV);
///< Conversion ops
DEF_OP(VInsGPR);

View File

@ -307,6 +307,42 @@ DEF_OP(CPUID) {
mov(Dst.second, rdx);
}
DEF_OP(XGETBV) {
auto Op = IROp->C<IR::IROp_XGetBV>();
for (auto &Reg : RA64)
push(Reg);
// CPUID ABI
// this: rdi
// Function: rsi
//
// Result: RAX, RDX. 4xi32
// rsi can be in the source registers, so copy argument to edx first
mov (esi, GetSrc<RA_32>(Op->Function.ID()));
mov (rdi, qword [STATE + offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.CPUIDObj)]);
auto NumPush = RA64.size();
if (NumPush & 1)
sub(rsp, 8); // Align
// {rdi, rsi, rdx}
call(qword [STATE + offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.XCRFunction)]);
if (NumPush & 1)
add(rsp, 8); // Align
for (uint32_t i = RA64.size(); i > 0; --i)
pop(RA64[i - 1]);
auto Dst = GetSrcPair<RA_64>(Node);
mov(Dst.first.cvt32(), eax);
mov(Dst.second, rax);
shr(Dst.second, 32);
}
#undef DEF_OP
void X86JITCore::RegisterBranchHandlers() {
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &X86JITCore::Op_##x
@ -319,6 +355,7 @@ void X86JITCore::RegisterBranchHandlers() {
REGISTER_OP(VALIDATECODE, ValidateCode);
REGISTER_OP(THREADREMOVECODEENTRY, ThreadRemoveCodeEntry);
REGISTER_OP(CPUID, CPUID);
REGISTER_OP(XGETBV, XGETBV);
#undef REGISTER_OP
}
}

View File

@ -444,6 +444,11 @@ X86JITCore::X86JITCore(FEXCore::Context::ContextImpl *ctx, FEXCore::Core::Intern
Common.CPUIDFunction = PMF.GetConvertedPointer();
}
{
FEXCore::Utils::MemberFunctionToPointerCast PMF(&FEXCore::CPUIDEmu::RunXCRFunction);
Common.XCRFunction = PMF.GetConvertedPointer();
}
Common.SyscallHandlerObj = reinterpret_cast<uint64_t>(CTX->SyscallHandler);
Common.SyscallHandlerFunc = reinterpret_cast<uint64_t>(FEXCore::Context::HandleSyscall);
Common.ExitFunctionLink = reinterpret_cast<uintptr_t>(&Context::ContextImpl::ThreadExitFunctionLink<X86JITCore_ExitFunctionLink>);

View File

@ -313,6 +313,7 @@ private:
DEF_OP(ValidateCode);
DEF_OP(ThreadRemoveCodeEntry);
DEF_OP(CPUID);
DEF_OP(XGETBV);
///< Conversion ops
DEF_OP(VInsGPR);

View File

@ -1779,6 +1779,18 @@ void OpDispatchBuilder::CPUIDOp(OpcodeArgs) {
StoreGPRRegister(X86State::REG_RCX, _Bfe(32, 0, Result_Upper));
}
void OpDispatchBuilder::XGetBVOp(OpcodeArgs) {
OrderedNode *Function = LoadGPRRegister(X86State::REG_RCX);
auto Res = _XGetBV(Function);
OrderedNode *Result_Lower = _ExtractElementPair(Res, 0);
OrderedNode *Result_Upper = _ExtractElementPair(Res, 1);
StoreGPRRegister(X86State::REG_RAX, Result_Lower);
StoreGPRRegister(X86State::REG_RDX, Result_Upper);
}
template<bool SHL1Bit>
void OpDispatchBuilder::SHLOp(OpcodeArgs) {
OrderedNode *Src{};
@ -6736,7 +6748,7 @@ constexpr uint16_t PF_F2 = 3;
constexpr std::tuple<uint8_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> SecondaryModRMExtensionOpTable[] = {
// REG /2
{((1 << 3) | 0), 1, &OpDispatchBuilder::UnimplementedOp},
{((1 << 3) | 0), 1, &OpDispatchBuilder::XGetBVOp},
// REG /7
{((3 << 3) | 1), 1, &OpDispatchBuilder::RDTSCPOp},

View File

@ -219,6 +219,7 @@ public:
void MOVOffsetOp(OpcodeArgs);
void CMOVOp(OpcodeArgs);
void CPUIDOp(OpcodeArgs);
void XGetBVOp(OpcodeArgs);
template<bool SHL1Bit>
void SHLOp(OpcodeArgs);
void SHLImmediateOp(OpcodeArgs);

View File

@ -297,6 +297,13 @@
],
"DestSize": "16",
"NumElements": "2"
},
"GPRPair = XGetBV GPR:$Function": {
"Desc": ["Calls in to the XCR handler function to return emulated XCR",
"Returns a 64bit GPR pair that fits emulated EAX, EDX respectively"
],
"DestSize": "8",
"NumElements": "2"
}
},
"Moves": {

View File

@ -5,5 +5,9 @@ namespace FEXCore::CPUID {
struct FunctionResults {
uint32_t eax, ebx, ecx, edx;
};
struct XCRResults {
uint32_t eax, edx;
};
}

View File

@ -259,6 +259,7 @@ namespace FEXCore::Context {
FEX_DEFAULT_VISIBILITY virtual void SetSyscallHandler(FEXCore::HLE::SyscallHandler *Handler) = 0;
FEX_DEFAULT_VISIBILITY virtual FEXCore::CPUID::FunctionResults RunCPUIDFunction(uint32_t Function, uint32_t Leaf) = 0;
FEX_DEFAULT_VISIBILITY virtual FEXCore::CPUID::XCRResults RunXCRFunction(uint32_t Function) = 0;
FEX_DEFAULT_VISIBILITY virtual FEXCore::CPUID::FunctionResults RunCPUIDFunctionName(uint32_t Function, uint32_t Leaf, uint32_t CPU) = 0;
FEX_DEFAULT_VISIBILITY virtual FEXCore::IR::AOTIRCacheEntry *LoadAOTIRCacheEntry(const fextl::string& Name) = 0;

View File

@ -144,6 +144,7 @@ namespace FEXCore::Core {
uint64_t ThreadRemoveCodeEntryFromJIT{};
uint64_t CPUIDObj{};
uint64_t CPUIDFunction{};
uint64_t XCRFunction{};
uint64_t SyscallHandlerObj{};
uint64_t SyscallHandlerFunc{};
uint64_t ExitFunctionLink{};

View File

@ -0,0 +1,18 @@
%ifdef CONFIG
{
"RegData": {
"RAX": "0x3",
"RDX": "0x0"
}
}
%endif
mov ecx, 0
xgetbv
; Mask only the lower two bits to get host and FEX runners to match.
; This way we can test that we're getting data back.
; Bit 0 and 1 refer to X87 and SSE respectively.
and eax, 0x3
hlt