Merge pull request #3935 from bylaws/win-feats

Windows: Support CPU feature detection from ID registry keys
This commit is contained in:
Ryan Houdek 2024-08-13 13:40:54 -07:00 committed by GitHub
commit 97c229d5eb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 190 additions and 40 deletions

View File

@ -2157,6 +2157,11 @@ DEF_OP(ParanoidStoreMemTSO) {
}
DEF_OP(CacheLineClear) {
if (!CTX->HostFeatures.SupportsCacheMaintenanceOps) {
dmb(ARMEmitter::BarrierScope::SY);
return;
}
auto Op = IROp->C<IR::IROp_CacheLineClear>();
auto MemReg = GetReg(Op->Addr.ID());
@ -2181,6 +2186,11 @@ DEF_OP(CacheLineClear) {
}
DEF_OP(CacheLineClean) {
if (!CTX->HostFeatures.SupportsCacheMaintenanceOps) {
dmb(ARMEmitter::BarrierScope::ST);
return;
}
auto Op = IROp->C<IR::IROp_CacheLineClean>();
auto MemReg = GetReg(Op->Addr.ID());

View File

@ -12,6 +12,7 @@ struct HostFeatures {
*/
uint32_t DCacheLineSize {};
uint32_t ICacheLineSize {};
bool SupportsCacheMaintenanceOps {};
bool SupportsAES {};
bool SupportsCRC {};
bool SupportsCLZERO {};

View File

@ -42,12 +42,6 @@ static void SetFPCR(uint64_t Value) {
__asm("msr FPCR, %[Value]" ::[Value] "r"(Value));
}
static uint32_t GetMIDR() {
uint64_t Result {};
__asm("mrs %[Res], MIDR_EL1" : [Res] "=r"(Result));
return Result;
}
__attribute__((naked)) static uint64_t ReadSVEVectorLengthInBits() {
///< Can't use rdvl instruction directly because compilers will complain that sve/sme is required.
__asm(R"(
@ -131,24 +125,14 @@ static void OverrideFeatures(FEXCore::HostFeatures* Features, uint64_t ForceSVEW
Features->SupportsSVE256 = ForceSVEWidth && ForceSVEWidth >= 256;
}
FEXCore::HostFeatures FetchHostFeatures() {
FEXCore::HostFeatures FetchHostFeatures(vixl::CPUFeatures Features, bool SupportsCacheMaintenanceOps, uint64_t CTR, uint64_t MIDR) {
FEXCore::HostFeatures HostFeatures;
#ifdef VIXL_SIMULATOR
auto Features = vixl::CPUFeatures::All();
// Vixl simulator doesn't support AFP.
Features.Remove(vixl::CPUFeatures::Feature::kAFP);
// Vixl simulator doesn't support RPRES.
Features.Remove(vixl::CPUFeatures::Feature::kRPRES);
#elif !defined(_WIN32)
auto Features = vixl::CPUFeatures::InferFromOS();
#else
// Need to use ID registers in WINE.
auto Features = vixl::CPUFeatures::InferFromIDRegisters();
#endif
FEX_CONFIG_OPT(ForceSVEWidth, FORCESVEWIDTH);
FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);
HostFeatures.SupportsCacheMaintenanceOps = SupportsCacheMaintenanceOps;
HostFeatures.SupportsAES = Features.Has(vixl::CPUFeatures::Feature::kAES);
HostFeatures.SupportsCRC = Features.Has(vixl::CPUFeatures::Feature::kCRC32);
HostFeatures.SupportsSHA = Features.Has(vixl::CPUFeatures::Feature::kSHA1) && Features.Has(vixl::CPUFeatures::Feature::kSHA2);
@ -184,14 +168,6 @@ FEXCore::HostFeatures FetchHostFeatures() {
}
#ifdef _M_ARM_64
// We need to get the CPU's cache line size
// We expect sane targets that have correct cacheline sizes across clusters
uint64_t CTR;
__asm volatile("mrs %[ctr], ctr_el0" : [ctr] "=r"(CTR));
HostFeatures.DCacheLineSize = 4 << ((CTR >> 16) & 0xF);
HostFeatures.ICacheLineSize = 4 << (CTR & 0xF);
// Test if this CPU supports float exception trapping by attempting to enable
// On unsupported these bits are architecturally defined as RAZ/WI
constexpr uint32_t ExceptionEnableTraps = (1U << 8) | // Invalid Operation float exception trap enable
@ -211,7 +187,6 @@ FEXCore::HostFeatures FetchHostFeatures() {
SetFPCR(OriginalFPCR);
if (HostFeatures.SupportsRAND) {
const auto MIDR = GetMIDR();
constexpr uint32_t Implementer_QCOM = 0x51;
constexpr uint32_t PartNum_Oryon1 = 0x001;
const uint32_t MIDR_Implementer = (MIDR >> 24) & 0xFF;
@ -247,12 +222,14 @@ FEXCore::HostFeatures FetchHostFeatures() {
}
#endif
#if defined(_M_X86_64)
// Hardcoded cacheline size.
HostFeatures.DCacheLineSize = 64U;
HostFeatures.ICacheLineSize = 64U;
if (CTR) {
HostFeatures.DCacheLineSize = 4 << ((CTR >> 16) & 0xF);
HostFeatures.ICacheLineSize = 4 << (CTR & 0xF);
} else {
HostFeatures.DCacheLineSize = HostFeatures.ICacheLineSize = 64;
}
#if !defined(VIXL_SIMULATOR)
#if defined(_M_X86_64) && !defined(VIXL_SIMULATOR)
Xbyak::util::Cpu X86Features {};
HostFeatures.SupportsAES = X86Features.has(Xbyak::util::Cpu::tAESNI);
HostFeatures.SupportsCRC = X86Features.has(Xbyak::util::Cpu::tSSE42);
@ -277,7 +254,6 @@ FEXCore::HostFeatures FetchHostFeatures() {
HostFeatures.SupportsAFP = true;
HostFeatures.SupportsFloatExceptions = true;
#endif
#endif
HostFeatures.SupportsPreserveAllABI = FEX_HAS_PRESERVE_ALL_ATTR;
@ -294,4 +270,27 @@ FEXCore::HostFeatures FetchHostFeatures() {
OverrideFeatures(&HostFeatures, ForceSVEWidth());
return HostFeatures;
}
FEXCore::HostFeatures FetchHostFeatures() {
#ifdef VIXL_SIMULATOR
auto Features = vixl::CPUFeatures::All();
// Vixl simulator doesn't support AFP.
Features.Remove(vixl::CPUFeatures::Feature::kAFP);
// Vixl simulator doesn't support RPRES.
Features.Remove(vixl::CPUFeatures::Feature::kRPRES);
#else
auto Features = vixl::CPUFeatures::InferFromOS();
#endif
uint64_t CTR = 0;
uint64_t MIDR = 0;
#ifdef _M_ARM_64
// We need to get the CPU's cache line size
// We expect sane targets that have correct cacheline sizes across clusters
__asm volatile("mrs %[ctr], ctr_el0" : [ctr] "=r"(CTR));
__asm volatile("mrs %[midr], midr_el1" : [midr] "=r"(MIDR));
#endif
return FetchHostFeatures(Features, true, CTR, MIDR);
}
} // namespace FEX

View File

@ -1,7 +1,9 @@
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/Core/HostFeatures.h>
#include "aarch64/cpu-aarch64.h"
namespace FEX {
FEXCore::HostFeatures FetchHostFeatures(vixl::CPUFeatures Features, bool SupportsCacheMaintenanceOps, uint64_t CTR, uint64_t MIDR);
FEXCore::HostFeatures FetchHostFeatures();
}

View File

@ -25,7 +25,6 @@ $end_info$
#include <FEXCore/Utils/TypeDefines.h>
#include "Common/Config.h"
#include "Common/HostFeatures.h"
#include "Common/InvalidationTracker.h"
#include "Common/TSOHandlerConfig.h"
#include "Common/CPUFeatures.h"
@ -532,8 +531,9 @@ NTSTATUS ProcessInit() {
SyscallHandler = fextl::make_unique<ECSyscallHandler>();
Exception::HandlerConfig.emplace();
const auto NtDll = GetModuleHandle("ntdll.dll");
{
auto HostFeatures = FEX::FetchHostFeatures();
auto HostFeatures = FEX::Windows::CPUFeatures::FetchHostFeatures(!!GetProcAddress(NtDll, "__wine_get_version"));
CTX = FEXCore::Context::Context::CreateNewContext(HostFeatures);
}
@ -548,7 +548,6 @@ NTSTATUS ProcessInit() {
FillNtDllLUTs();
PatchCallChecker();
const auto NtDll = GetModuleHandle("ntdll.dll");
const uintptr_t KiUserExceptionDispatcherFFS = reinterpret_cast<uintptr_t>(GetProcAddress(NtDll, "KiUserExceptionDispatcher"));
Exception::KiUserExceptionDispatcher = NtDllRedirectionLUT[KiUserExceptionDispatcherFFS - NtDllBase] + NtDllBase;
const auto WineSyscallDispatcherPtr = reinterpret_cast<void**>(GetProcAddress(NtDll, "__wine_syscall_dispatcher"));

View File

@ -1,9 +1,64 @@
// SPDX-License-Identifier: MIT
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/HostFeatures.h>
#include <FEXCore/fextl/fmt.h>
#include <windows.h>
#include "aarch64/cpu-aarch64.h"
#include "CPUFeatures.h"
namespace {
HKEY OpenProcessorKey(uint32_t Idx) {
HKEY Out;
auto Path = fextl::fmt::format("Hardware\\Description\\System\\CentralProcessor\\{}", Idx);
if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, Path.c_str(), 0, KEY_READ, &Out)) {
return nullptr;
}
return Out;
}
uint64_t ReadRegU64(HKEY Key, const char* Name) {
uint64_t Value = 0;
DWORD Size = sizeof(Value);
RegGetValueA(Key, nullptr, Name, 0, nullptr, &Value, &Size);
return Value;
}
template<typename RegType>
void AddRegFeatures(vixl::CPUFeatures& Features, HKEY Key, const char* Name) {
return Features.Combine(RegType(ReadRegU64(Key, Name)).GetCPUFeatures());
}
} // namespace
namespace FEX::Windows {
FEXCore::HostFeatures CPUFeatures::FetchHostFeatures(bool IsWine) {
vixl::CPUFeatures Features {};
HKEY Key = OpenProcessorKey(0);
if (!Key) {
ERROR_AND_DIE_FMT("Couldn't detect CPU features");
}
AddRegFeatures<vixl::aarch64::AA64PFR0>(Features, Key, "CP 4020");
AddRegFeatures<vixl::aarch64::AA64PFR1>(Features, Key, "CP 4021");
AddRegFeatures<vixl::aarch64::AA64ZFR0>(Features, Key, "CP 4024");
AddRegFeatures<vixl::aarch64::AA64SMFR0>(Features, Key, "CP 4025");
AddRegFeatures<vixl::aarch64::AA64ISAR0>(Features, Key, "CP 4030");
AddRegFeatures<vixl::aarch64::AA64ISAR1>(Features, Key, "CP 4031");
AddRegFeatures<vixl::aarch64::AA64ISAR2>(Features, Key, "CP 4032");
AddRegFeatures<vixl::aarch64::AA64MMFR0>(Features, Key, "CP 4038");
AddRegFeatures<vixl::aarch64::AA64MMFR1>(Features, Key, "CP 4039");
AddRegFeatures<vixl::aarch64::AA64MMFR2>(Features, Key, "CP 403A");
uint64_t CTR = ReadRegU64(Key, "CP 5801");
uint64_t MIDR = ReadRegU64(Key, "CP 4000");
RegCloseKey(Key);
return FEX::FetchHostFeatures(Features, !IsWine, CTR, MIDR);
}
CPUFeatures::CPUFeatures(FEXCore::Context::Context& CTX) {
#ifdef _M_ARM_64EC
// Report as a 64-bit host for ARM64EC.

View File

@ -4,6 +4,8 @@
#include <windef.h>
#include <winternl.h>
#include <Common/HostFeatures.h>
namespace FEXCore::Context {
class Context;
}
@ -14,6 +16,8 @@ class Context;
namespace FEX::Windows {
class CPUFeatures {
public:
static FEXCore::HostFeatures FetchHostFeatures(bool IsWine);
CPUFeatures(FEXCore::Context::Context& CTX);
/**

View File

@ -122,3 +122,65 @@ DLLEXPORT_FUNC(DWORD, GetLastError, ()) {
DLLEXPORT_FUNC(void, SetLastError, (DWORD dwErrCode)) {
GetCurrentTEB()->LastErrorValue = dwErrCode;
}
DLLEXPORT_FUNC(LONG, RegOpenKeyExA, (HKEY hKey, LPCSTR lpSubKey, DWORD ulOptions, REGSAM samDesired, PHKEY phkResult)) {
if (hKey != HKEY_LOCAL_MACHINE) {
UNIMPLEMENTED();
}
ScopedUnicodeString RootKey {"\\Registry\\Machine"};
OBJECT_ATTRIBUTES ObjAttributes;
InitializeObjectAttributes(&ObjAttributes, &*RootKey, OBJ_CASE_INSENSITIVE, nullptr, nullptr);
HKEY HKLM;
NTSTATUS Status = NtOpenKeyEx(reinterpret_cast<HANDLE*>(&HKLM), MAXIMUM_ALLOWED, &ObjAttributes, 0);
if (Status) {
return RtlNtStatusToDosError(Status);
}
ScopedUnicodeString SubKey {lpSubKey};
InitializeObjectAttributes(&ObjAttributes, &*SubKey, OBJ_CASE_INSENSITIVE, reinterpret_cast<HANDLE>(HKLM), nullptr);
Status = NtOpenKeyEx(reinterpret_cast<HANDLE*>(phkResult), samDesired, &ObjAttributes, ulOptions);
NtClose(HKLM);
return RtlNtStatusToDosError(Status);
}
DLLEXPORT_FUNC(LONG, RegGetValueA, (HKEY hKey, LPCSTR lpSubKey, LPCSTR lpValue, DWORD dwFlags, LPDWORD pdwType, PVOID pvData, LPDWORD pcbData)) {
if (lpSubKey || dwFlags) {
UNIMPLEMENTED();
}
ScopedUnicodeString ValueName {lpValue};
union {
KEY_VALUE_PARTIAL_INFORMATION Info;
uint8_t Buf[512];
} Data;
ULONG OutSize;
NTSTATUS Status = NtQueryValueKey(hKey, &*ValueName, KeyValuePartialInformation, &Data.Info, sizeof(Data), &OutSize);
if (Status) {
return RtlNtStatusToDosError(Status);
}
if (pdwType) {
*pdwType = Data.Info.Type;
}
if (pvData) {
if (*pcbData < Data.Info.DataLength) {
*pcbData = Data.Info.DataLength;
return ERROR_MORE_DATA;
}
memcpy(pvData, &Data.Info.Data, Data.Info.DataLength);
}
if (pcbData) {
*pcbData = Data.Info.DataLength;
}
return ERROR_SUCCESS;
}
DLLEXPORT_FUNC(LONG, RegCloseKey, (HKEY hKey)) {
return RtlNtStatusToDosError(NtClose(hKey));
}

View File

@ -26,7 +26,6 @@ $end_info$
#include <FEXCore/Utils/TypeDefines.h>
#include "Common/Config.h"
#include "Common/HostFeatures.h"
#include "Common/TSOHandlerConfig.h"
#include "Common/InvalidationTracker.h"
#include "Common/CPUFeatures.h"
@ -435,9 +434,10 @@ void BTCpuProcessInit() {
SignalDelegator = fextl::make_unique<FEX::DummyHandlers::DummySignalDelegator>();
SyscallHandler = fextl::make_unique<WowSyscallHandler>();
Context::HandlerConfig.emplace();
const auto NtDll = GetModuleHandle("ntdll.dll");
{
auto HostFeatures = FEX::FetchHostFeatures();
auto HostFeatures = FEX::Windows::CPUFeatures::FetchHostFeatures(!!GetProcAddress(NtDll, "__wine_get_version"));
CTX = FEXCore::Context::Context::CreateNewContext(HostFeatures);
}
@ -455,7 +455,7 @@ void BTCpuProcessInit() {
BridgeInstrs::Syscall = Addr;
BridgeInstrs::UnixCall = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(Addr) + 2);
const auto Sym = GetProcAddress(GetModuleHandle("ntdll.dll"), "__wine_unix_call_dispatcher");
const auto Sym = GetProcAddress(NtDll, "__wine_unix_call_dispatcher");
if (Sym) {
WineUnixCall = *reinterpret_cast<decltype(WineUnixCall)*>(Sym);
}

View File

@ -415,6 +415,22 @@ typedef enum _MEMORY_INFORMATION_CLASS {
#endif
} MEMORY_INFORMATION_CLASS;
typedef enum _KEY_VALUE_INFORMATION_CLASS {
KeyValueBasicInformation,
KeyValueFullInformation,
KeyValuePartialInformation,
KeyValueFullInformationAlign64,
KeyValuePartialInformationAlign64,
KeyValueLayerInformation,
} KEY_VALUE_INFORMATION_CLASS;
typedef struct _KEY_VALUE_PARTIAL_INFORMATION {
ULONG TitleIndex;
ULONG Type;
ULONG DataLength;
UCHAR Data[1];
} KEY_VALUE_PARTIAL_INFORMATION, *PKEY_VALUE_PARTIAL_INFORMATION;
NTSTATUS WINAPIV DbgPrint(LPCSTR fmt, ...);
NTSTATUS WINAPI LdrDisableThreadCalloutsForDll(HMODULE);
NTSTATUS WINAPI LdrGetDllHandle(LPCWSTR, ULONG, const UNICODE_STRING*, HMODULE*);
@ -426,8 +442,10 @@ NTSTATUS WINAPI NtFlushInstructionCache(HANDLE, LPCVOID, SIZE_T);
NTSTATUS WINAPI NtFreeVirtualMemory(HANDLE, PVOID*, SIZE_T*, ULONG);
NTSTATUS WINAPI NtGetContextThread(HANDLE, CONTEXT*);
ULONG WINAPI NtGetCurrentProcessorNumber(void);
NTSTATUS WINAPI NtOpenKeyEx(PHANDLE, ACCESS_MASK, const OBJECT_ATTRIBUTES*, ULONG);
NTSTATUS WINAPI NtProtectVirtualMemory(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG*);
NTSTATUS WINAPI NtQueryAttributesFile(const OBJECT_ATTRIBUTES*, FILE_BASIC_INFORMATION*);
NTSTATUS WINAPI NtQueryValueKey(HANDLE, const UNICODE_STRING*, KEY_VALUE_INFORMATION_CLASS, void*, DWORD, DWORD*);
NTSTATUS WINAPI NtQueryVirtualMemory(HANDLE, LPCVOID, MEMORY_INFORMATION_CLASS, PVOID, SIZE_T, SIZE_T*);
NTSTATUS WINAPI NtReadFile(HANDLE, HANDLE, PIO_APC_ROUTINE, PVOID, PIO_STATUS_BLOCK, PVOID, ULONG, PLARGE_INTEGER, PULONG);
NTSTATUS WINAPI NtSetContextThread(HANDLE, const CONTEXT*);