mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-11-23 14:40:14 +00:00
Works around Clang failing to identify new Kryo CPUs
Some of the newer CPU cores in LLVM's source claim to be a Cortex-A73, which means they become limited to an ARMv8.0 feature set. This is what you get if you compile FEX with -mcpu=native To work around this issue, manually parse /proc/cpuinfo ourselves and pull out the CPU type to pass to clang directly. This also fixes the issue that we were using -march on AArch64, which no longer works on newer clang versions. We instead need to use mcpu or mtune. Should improve all atomic op performance outside of the JITs, where they were turning in to loadstore exclusive pairs.
This commit is contained in:
parent
fa542a5b9d
commit
921867de7e
@ -143,6 +143,22 @@ if(COMPILER_SUPPORTS_MARCH_NATIVE)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
|
||||
endif()
|
||||
|
||||
if(_M_ARM_64)
|
||||
# Due to an oversight in llvm, it declares any reasonably new Kryo CPU to only be ARMv8.0
|
||||
# Manually detect newer CPU revisions until clang and llvm fixes their bug
|
||||
# This script will either provide a supported CPU or 'native'
|
||||
# Additionally -march doesn't work under AArch64+Clang, so you have to use -mcpu or -mtune
|
||||
execute_process(COMMAND python3 "${PROJECT_SOURCE_DIR}/Scripts/aarch64_fit_native.py" "/proc/cpuinfo"
|
||||
OUTPUT_VARIABLE AARCH64_CPU)
|
||||
|
||||
string(STRIP ${AARCH64_CPU} AARCH64_CPU)
|
||||
|
||||
check_cxx_compiler_flag("-mcpu=${AARCH64_CPU}" COMPILER_SUPPORTS_CPU_TYPE)
|
||||
if(COMPILER_SUPPORTS_CPU_TYPE)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=${AARCH64_CPU}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_compile_options(-Wall)
|
||||
|
||||
add_subdirectory(External/FEXCore)
|
||||
|
68
Scripts/aarch64_fit_native.py
Normal file
68
Scripts/aarch64_fit_native.py
Normal file
@ -0,0 +1,68 @@
|
||||
#!/usr/bin/python3
|
||||
import re
|
||||
import sys
|
||||
import subprocess
|
||||
|
||||
# Order this list from oldest to newest
|
||||
# try not to list something newer than our minimum compiler supported version
|
||||
BigCoreIDs = {
|
||||
# ARM
|
||||
tuple([0x41, 0xd07]): "cortex-a57",
|
||||
tuple([0x41, 0xd08]): "cortex-a72",
|
||||
tuple([0x41, 0xd09]): "cortex-a73",
|
||||
tuple([0x41, 0xd0a]): "cortex-a75",
|
||||
tuple([0x41, 0xd0b]): "cortex-a76",
|
||||
tuple([0x41, 0xd0d]): "cortex-a77",
|
||||
tuple([0x41, 0xd41]): "cortex-a78",
|
||||
tuple([0x41, 0xd44]): "cortex-x1",
|
||||
tuple([0x41, 0xd0c]): "neoverse-n1",
|
||||
tuple([0x41, 0xd49]): "neoverse-n2",
|
||||
## Nvidia
|
||||
tuple([0x4e, 0x004]): "carmel", # Carmel
|
||||
# Qualcomm
|
||||
tuple([0x51, 0x800]): "cortex-a73", # Kryo 2xx Gold
|
||||
tuple([0x51, 0x802]): "cortex-a75", # Kryo 3xx Gold
|
||||
tuple([0x51, 0x804]): "cortex-a76", # Kryo 4xx Gold
|
||||
}
|
||||
|
||||
LittleCoreIDs = {
|
||||
# ARM
|
||||
tuple([0x41, 0xd04]): "cortex-a35",
|
||||
tuple([0x41, 0xd03]): "cortex-a53",
|
||||
tuple([0x41, 0xd05]): "cortex-a55",
|
||||
|
||||
# Qualcomm
|
||||
tuple([0x51, 0x801]): "cortex-a53", # Kryo 2xx Silver
|
||||
tuple([0x51, 0x803]): "cortex-a55", # Kryo 3xx Silver
|
||||
tuple([0x51, 0x805]): "cortex-a55", # Kryo 4xx/5xx Silver
|
||||
}
|
||||
|
||||
# Args: </proc/cpuinfo file>
|
||||
if (len(sys.argv) < 2):
|
||||
sys.exit()
|
||||
|
||||
cpuinfo = []
|
||||
with open(sys.argv[1]) as cpuinfo_file:
|
||||
current_implementer = 0
|
||||
current_part = 0
|
||||
for line in cpuinfo_file:
|
||||
line = line.strip()
|
||||
if "CPU implementer" in line:
|
||||
current_implementer = int(re.findall(r'0x[0-9A-F]+', line, re.I)[0], 16)
|
||||
if "CPU part" in line:
|
||||
current_part = int(re.findall(r'0x[0-9A-F]+', line, re.I)[0], 16)
|
||||
cpuinfo += {tuple([current_implementer, current_part])}
|
||||
|
||||
largest_big = "native"
|
||||
largest_little = "native"
|
||||
|
||||
for core in cpuinfo:
|
||||
if BigCoreIDs.get(core):
|
||||
largest_big = BigCoreIDs.get(core)
|
||||
|
||||
if LittleCoreIDs.get(core):
|
||||
largest_little = LittleCoreIDs.get(core)
|
||||
|
||||
# We only want the big core output
|
||||
print(largest_big)
|
||||
# print(largest_little)
|
Loading…
Reference in New Issue
Block a user