Works around Clang failing to identify new Kryo CPUs

Some of the newer CPU cores in LLVM's source claim to be a Cortex-A73, which means they become limited to an ARMv8.0 feature set. This is what you get if you compile FEX with -mcpu=native To work around this issue, manually parse /proc/cpuinfo ourselves and pull out the CPU type to pass to clang directly. This also fixes the issue that we were using -march on AArch64, which no longer works on newer clang versions. We instead need to use mcpu or mtune. Should improve all atomic op performance outside of the JITs, where they were turning in to loadstore exclusive pairs.
2024-11-23 14:40:14 +00:00 · 2021-01-19 03:21:10 -08:00 · 2021-01-19 03:21:10 -08:00 · 921867de7e
commit 921867de7e
parent fa542a5b9d
2 changed files with 84 additions and 0 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -143,6 +143,22 @@ if(COMPILER_SUPPORTS_MARCH_NATIVE)
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
 endif()
 if(_M_ARM_64)
  # Due to an oversight in llvm, it declares any reasonably new Kryo CPU to only be ARMv8.0
  # Manually detect newer CPU revisions until clang and llvm fixes their bug
  # This script will either provide a supported CPU or 'native'
  # Additionally -march doesn't work under AArch64+Clang, so you have to use -mcpu or -mtune
  execute_process(COMMAND python3 "${PROJECT_SOURCE_DIR}/Scripts/aarch64_fit_native.py" "/proc/cpuinfo"
    OUTPUT_VARIABLE AARCH64_CPU)
  string(STRIP ${AARCH64_CPU} AARCH64_CPU)
  check_cxx_compiler_flag("-mcpu=${AARCH64_CPU}" COMPILER_SUPPORTS_CPU_TYPE)
  if(COMPILER_SUPPORTS_CPU_TYPE)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=${AARCH64_CPU}")
  endif()
 endif()
 add_compile_options(-Wall)
 add_subdirectory(External/FEXCore)
--- a/Scripts/aarch64_fit_native.py
+++ b/Scripts/aarch64_fit_native.py
@ -0,0 +1,68 @@
 #!/usr/bin/python3
 import re
 import sys
 import subprocess
 # Order this list from oldest to newest
 # try not to list something newer than our minimum compiler supported version
 BigCoreIDs = {
        # ARM
        tuple([0x41, 0xd07]): "cortex-a57",
        tuple([0x41, 0xd08]): "cortex-a72",
        tuple([0x41, 0xd09]): "cortex-a73",
        tuple([0x41, 0xd0a]): "cortex-a75",
        tuple([0x41, 0xd0b]): "cortex-a76",
        tuple([0x41, 0xd0d]): "cortex-a77",
        tuple([0x41, 0xd41]): "cortex-a78",
        tuple([0x41, 0xd44]): "cortex-x1",
        tuple([0x41, 0xd0c]): "neoverse-n1",
        tuple([0x41, 0xd49]): "neoverse-n2",
        ## Nvidia
        tuple([0x4e, 0x004]): "carmel", # Carmel
        # Qualcomm
        tuple([0x51, 0x800]): "cortex-a73", # Kryo 2xx Gold
        tuple([0x51, 0x802]): "cortex-a75", # Kryo 3xx Gold
        tuple([0x51, 0x804]): "cortex-a76", # Kryo 4xx Gold
 }
 LittleCoreIDs = {
        # ARM
        tuple([0x41, 0xd04]): "cortex-a35",
        tuple([0x41, 0xd03]): "cortex-a53",
        tuple([0x41, 0xd05]): "cortex-a55",
        # Qualcomm
        tuple([0x51, 0x801]): "cortex-a53", # Kryo 2xx Silver
        tuple([0x51, 0x803]): "cortex-a55", # Kryo 3xx Silver
        tuple([0x51, 0x805]): "cortex-a55", # Kryo 4xx/5xx Silver
 }
 # Args: </proc/cpuinfo file>
 if (len(sys.argv) < 2):
    sys.exit()
 cpuinfo = []
 with open(sys.argv[1]) as cpuinfo_file:
    current_implementer = 0
    current_part = 0
    for line in cpuinfo_file:
        line = line.strip()
        if "CPU implementer" in line:
            current_implementer = int(re.findall(r'0x[0-9A-F]+', line, re.I)[0], 16)
        if "CPU part" in line:
            current_part = int(re.findall(r'0x[0-9A-F]+', line, re.I)[0], 16)
            cpuinfo += {tuple([current_implementer, current_part])}
 largest_big = "native"
 largest_little = "native"
 for core in cpuinfo:
    if BigCoreIDs.get(core):
        largest_big = BigCoreIDs.get(core)
    if LittleCoreIDs.get(core):
        largest_little = LittleCoreIDs.get(core)
 # We only want the big core output
 print(largest_big)
 # print(largest_little)