[X86] Support -march=gracemont

gracemont has some different tuning features from alderlake.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D158046
This commit is contained in:
Freddy Ye 2023-08-21 08:48:45 +08:00
parent a695be7c28
commit 6acff5390d
14 changed files with 49 additions and 4 deletions

View File

@ -526,6 +526,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
case CK_Tremont:
defineCPUMacros(Builder, "tremont");
break;
// Gracemont and later atom-cores use P-core cpu macros.
case CK_Gracemont:
case CK_Nehalem:
case CK_Westmere:
case CK_SandyBridge:
@ -1420,6 +1422,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
case CK_Goldmont:
case CK_GoldmontPlus:
case CK_Tremont:
case CK_Gracemont:
case CK_Westmere:
case CK_SandyBridge:

View File

@ -83,3 +83,4 @@ ATTR(cpu_specific(graniterapids_d)) void CPU34(void){}
ATTR(cpu_specific(arrowlake)) void CPU35(void){}
ATTR(cpu_specific(arrowlake_s)) void CPU36(void){}
ATTR(cpu_specific(lunarlake)) void CPU37(void){}
ATTR(cpu_specific(gracemont)) void CPU38(void){}

View File

@ -25,6 +25,7 @@ int __attribute__((target("arch=graniterapids-d"))) foo(void) {return 20;}
int __attribute__((target("arch=arrowlake"))) foo(void) {return 21;}
int __attribute__((target("arch=arrowlake-s"))) foo(void) {return 22;}
int __attribute__((target("arch=lunarlake"))) foo(void) {return 23;}
int __attribute__((target("arch=gracemont"))) foo(void) {return 24;}
int __attribute__((target("default"))) foo(void) { return 2; }
int bar(void) {
@ -179,6 +180,8 @@ void calls_pr50025c(void) { pr50025c(); }
// LINUX: ret i32 22
// LINUX: define{{.*}} i32 @foo.arch_lunarlake()
// LINUX: ret i32 23
// LINUX: define{{.*}} i32 @foo.arch_gracemont()
// LINUX: ret i32 24
// LINUX: define{{.*}} i32 @foo()
// LINUX: ret i32 2
// LINUX: define{{.*}} i32 @bar()
@ -230,6 +233,8 @@ void calls_pr50025c(void) { pr50025c(); }
// WINDOWS: ret i32 22
// WINDOWS: define dso_local i32 @foo.arch_lunarlake()
// WINDOWS: ret i32 23
// WINDOWS: define dso_local i32 @foo.arch_gracemont()
// WINDOWS: ret i32 24
// WINDOWS: define dso_local i32 @foo()
// WINDOWS: ret i32 2
// WINDOWS: define dso_local i32 @bar()

View File

@ -136,6 +136,7 @@ void verifycpustrings(void) {
(void)__builtin_cpu_is("tigerlake");
(void)__builtin_cpu_is("sapphirerapids");
(void)__builtin_cpu_is("tremont");
(void)__builtin_cpu_is("gracemont");
(void)__builtin_cpu_is("westmere");
(void)__builtin_cpu_is("znver1");
(void)__builtin_cpu_is("znver2");

View File

@ -108,6 +108,10 @@
// RUN: | FileCheck %s -check-prefix=lunarlake
// lunarlake: "-target-cpu" "lunarlake"
//
// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=gracemont 2>&1 \
// RUN: | FileCheck %s -check-prefix=gracemont
// gracemont: "-target-cpu" "gracemont"
//
// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=lakemont 2>&1 \
// RUN: | FileCheck %s -check-prefix=lakemont
// lakemont: "-target-cpu" "lakemont"

View File

@ -13,19 +13,19 @@
// RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
// X86: error: unknown target CPU 'not-a-cpu'
// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}}
// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}}
// RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64
// X86_64: error: unknown target CPU 'not-a-cpu'
// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}}
// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}}
// RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86
// TUNE_X86: error: unknown target CPU 'not-a-cpu'
// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}
// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}
// RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64
// TUNE_X86_64: error: unknown target CPU 'not-a-cpu'
// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}
// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}
// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'

View File

@ -2003,6 +2003,9 @@
// RUN: %clang -march=meteorlake -m32 -E -dM %s -o - 2>&1 \
// RUN: -target i386-unknown-linux \
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M32
// RUN: %clang -march=gracemont -m32 -E -dM %s -o - 2>&1 \
// RUN: -target i386-unknown-linux \
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M32
// CHECK_ADL_M32: #define __ADX__ 1
// CHECK_ADL_M32: #define __AES__ 1
// CHECK_ADL_M32: #define __AVX2__ 1
@ -2071,6 +2074,9 @@
// RUN: %clang -march=meteorlake -m64 -E -dM %s -o - 2>&1 \
// RUN: -target i386-unknown-linux \
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M64
// RUN: %clang -march=gracemont -m64 -E -dM %s -o - 2>&1 \
// RUN: -target i386-unknown-linux \
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M64
// CHECK_ADL_M64: #define __ADX__ 1
// CHECK_ADL_M64: #define __AES__ 1
// CHECK_ADL_M64: #define __AVX2__ 1

View File

@ -456,6 +456,8 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
// Meteorlake:
case 0xaa:
case 0xac:
// Gracemont:
case 0xbe:
CPU = "alderlake";
*Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_ALDERLAKE;

View File

@ -108,6 +108,7 @@ X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake")
X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "meteorlake")
X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_SAPPHIRERAPIDS, "emeraldrapids")
X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ARROWLAKE_S,"lunarlake")
X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "gracemont")
#undef X86_CPU_SUBTYPE_ALIAS
#undef X86_CPU_SUBTYPE

View File

@ -87,6 +87,7 @@ enum CPUKind {
CK_Goldmont,
CK_GoldmontPlus,
CK_Tremont,
CK_Gracemont,
CK_Nehalem,
CK_Westmere,
CK_SandyBridge,

View File

@ -1207,6 +1207,18 @@ def ProcessorFeatures {
list<SubtargetFeature> ADLFeatures =
!listconcat(TRMFeatures, ADLAdditionalFeatures);
// Gracemont
list<SubtargetFeature> GRTTuning = [TuningMacroFusion,
TuningSlow3OpsLEA,
TuningSlowDivide32,
TuningSlowDivide64,
TuningFastScalarFSQRT,
TuningFastVectorFSQRT,
TuningFast15ByteNOP,
TuningFastVariablePerLaneShuffle,
TuningPOPCNTFalseDeps,
TuningInsertVZEROUPPER];
// Sierraforest
list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD,
FeatureAVXIFMA,
@ -1728,6 +1740,9 @@ def : ProcModel<"sapphirerapids", SapphireRapidsModel,
ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
def : ProcModel<"alderlake", AlderlakePModel,
ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
// FIXME: Use Gracemont Schedule Model when it is ready.
def : ProcModel<"gracemont", AlderlakePModel,
ProcessorFeatures.ADLFeatures, ProcessorFeatures.GRTTuning>;
def : ProcModel<"raptorlake", AlderlakePModel,
ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
def : ProcModel<"meteorlake", AlderlakePModel,

View File

@ -822,6 +822,8 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
// Alderlake:
case 0x97:
case 0x9a:
// Gracemont
case 0xbe:
// Raptorlake:
case 0xb7:
case 0xba:

View File

@ -430,6 +430,8 @@ constexpr ProcInfo Processors[] = {
{ {"arrowlake_s"}, CK_ArrowlakeS, FEATURE_AVX2, FeaturesArrowlakeS, 'p', true },
// Lunarlake microarchitecture based processors.
{ {"lunarlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesArrowlakeS, 'p', false },
// Gracemont microarchitecture based processors.
{ {"gracemont"}, CK_Gracemont, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
// Sierraforest microarchitecture based processors.
{ {"sierraforest"}, CK_Sierraforest, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
// Grandridge microarchitecture based processors.

View File

@ -36,6 +36,7 @@
; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=arrowlake-s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=arrowlake_s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=lunarlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=gracemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
@ -98,6 +99,7 @@
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=arrowlake-s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=arrowlake_s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=lunarlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=gracemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
define void @foo() {
ret void