Works around Clang failing to identify new Kryo CPUs

Some of the newer CPU cores in LLVM's source claim to be a Cortex-A73,
which means they become limited to an ARMv8.0 feature set.

This is what you get if you compile FEX with -mcpu=native

To work around this issue, manually parse /proc/cpuinfo ourselves and
pull out the CPU type to pass to clang directly.
This also fixes the issue that we were using -march on AArch64, which no
longer works on newer clang versions. We instead need to use mcpu or
mtune.

Should improve all atomic op performance outside of the JITs, where they
were turning in to loadstore exclusive pairs.
This commit is contained in:
Ryan Houdek 2021-01-19 03:21:10 -08:00
parent fa542a5b9d
commit 921867de7e
2 changed files with 84 additions and 0 deletions

View File

@ -143,6 +143,22 @@ if(COMPILER_SUPPORTS_MARCH_NATIVE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
endif() endif()
if(_M_ARM_64)
# Due to an oversight in llvm, it declares any reasonably new Kryo CPU to only be ARMv8.0
# Manually detect newer CPU revisions until clang and llvm fixes their bug
# This script will either provide a supported CPU or 'native'
# Additionally -march doesn't work under AArch64+Clang, so you have to use -mcpu or -mtune
execute_process(COMMAND python3 "${PROJECT_SOURCE_DIR}/Scripts/aarch64_fit_native.py" "/proc/cpuinfo"
OUTPUT_VARIABLE AARCH64_CPU)
string(STRIP ${AARCH64_CPU} AARCH64_CPU)
check_cxx_compiler_flag("-mcpu=${AARCH64_CPU}" COMPILER_SUPPORTS_CPU_TYPE)
if(COMPILER_SUPPORTS_CPU_TYPE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=${AARCH64_CPU}")
endif()
endif()
add_compile_options(-Wall) add_compile_options(-Wall)
add_subdirectory(External/FEXCore) add_subdirectory(External/FEXCore)

View File

@ -0,0 +1,68 @@
#!/usr/bin/python3
import re
import sys
import subprocess
# Order this list from oldest to newest
# try not to list something newer than our minimum compiler supported version
BigCoreIDs = {
# ARM
tuple([0x41, 0xd07]): "cortex-a57",
tuple([0x41, 0xd08]): "cortex-a72",
tuple([0x41, 0xd09]): "cortex-a73",
tuple([0x41, 0xd0a]): "cortex-a75",
tuple([0x41, 0xd0b]): "cortex-a76",
tuple([0x41, 0xd0d]): "cortex-a77",
tuple([0x41, 0xd41]): "cortex-a78",
tuple([0x41, 0xd44]): "cortex-x1",
tuple([0x41, 0xd0c]): "neoverse-n1",
tuple([0x41, 0xd49]): "neoverse-n2",
## Nvidia
tuple([0x4e, 0x004]): "carmel", # Carmel
# Qualcomm
tuple([0x51, 0x800]): "cortex-a73", # Kryo 2xx Gold
tuple([0x51, 0x802]): "cortex-a75", # Kryo 3xx Gold
tuple([0x51, 0x804]): "cortex-a76", # Kryo 4xx Gold
}
LittleCoreIDs = {
# ARM
tuple([0x41, 0xd04]): "cortex-a35",
tuple([0x41, 0xd03]): "cortex-a53",
tuple([0x41, 0xd05]): "cortex-a55",
# Qualcomm
tuple([0x51, 0x801]): "cortex-a53", # Kryo 2xx Silver
tuple([0x51, 0x803]): "cortex-a55", # Kryo 3xx Silver
tuple([0x51, 0x805]): "cortex-a55", # Kryo 4xx/5xx Silver
}
# Args: </proc/cpuinfo file>
if (len(sys.argv) < 2):
sys.exit()
cpuinfo = []
with open(sys.argv[1]) as cpuinfo_file:
current_implementer = 0
current_part = 0
for line in cpuinfo_file:
line = line.strip()
if "CPU implementer" in line:
current_implementer = int(re.findall(r'0x[0-9A-F]+', line, re.I)[0], 16)
if "CPU part" in line:
current_part = int(re.findall(r'0x[0-9A-F]+', line, re.I)[0], 16)
cpuinfo += {tuple([current_implementer, current_part])}
largest_big = "native"
largest_little = "native"
for core in cpuinfo:
if BigCoreIDs.get(core):
largest_big = BigCoreIDs.get(core)
if LittleCoreIDs.get(core):
largest_little = LittleCoreIDs.get(core)
# We only want the big core output
print(largest_big)
# print(largest_little)