[AArch64][SVE2] Add SVE2 target features to backend and TargetParser

Summary:
This patch adds the following features defined by Arm SVE2 architecture
extension:

  sve2, sve2-aes, sve2-sm4, sve2-sha3, bitperm

For existing CPUs these features are declared as unsupported to prevent
scheduler errors.

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewers: SjoerdMeijer, sdesmalen, ostannard, rovka

Reviewed By: SjoerdMeijer, rovka

Subscribers: rovka, javed.absar, tschuett, kristof.beyls, kristina, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61513

llvm-svn: 360573
This commit is contained in:
Cullen Rhodes 2019-05-13 10:10:24 +00:00
parent 711582147b
commit f378d68edf
19 changed files with 124 additions and 36 deletions

View File

@ -50,30 +50,35 @@ AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE)
#endif
// FIXME: This would be nicer were it tablegen
AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2")
AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes")
AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4")
AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3")
AARCH64_ARCH_EXT_NAME("bitperm", AArch64::AEK_BITPERM, "+bitperm", "-bitperm")
AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
#undef AARCH64_ARCH_EXT_NAME
#ifndef AARCH64_CPU_NAME

View File

@ -49,6 +49,11 @@ enum ArchExtKind : unsigned {
AEK_SSBS = 1 << 20,
AEK_SB = 1 << 21,
AEK_PREDRES = 1 << 22,
AEK_SVE2 = 1 << 23,
AEK_SVE2AES = 1 << 24,
AEK_SVE2SM4 = 1 << 25,
AEK_SVE2SHA3 = 1 << 26,
AEK_BITPERM = 1 << 27,
};
enum class ArchKind {

View File

@ -45,6 +45,11 @@ enum ArchExtKind : unsigned {
AEK_AES = 1 << 16,
AEK_FP16FML = 1 << 17,
AEK_SB = 1 << 18,
AEK_SVE2 = 1 << 19,
AEK_SVE2AES = 1 << 20,
AEK_SVE2SM4 = 1 << 21,
AEK_SVE2SHA3 = 1 << 22,
AEK_BITPERM = 1 << 23,
// Unsupported extensions.
AEK_OS = 0x8000000,
AEK_IWMMXT = 0x10000000,

View File

@ -88,6 +88,16 @@ bool AArch64::getExtensionFeatures(unsigned Extensions,
Features.push_back("+rdm");
if (Extensions & AEK_SVE)
Features.push_back("+sve");
if (Extensions & AEK_SVE2)
Features.push_back("+sve2");
if (Extensions & AEK_SVE2AES)
Features.push_back("+sve2-aes");
if (Extensions & AEK_SVE2SM4)
Features.push_back("+sve2-sm4");
if (Extensions & AEK_SVE2SHA3)
Features.push_back("+sve2-sha3");
if (Extensions & AEK_BITPERM)
Features.push_back("+bitperm");
if (Extensions & AEK_RCPC)
Features.push_back("+rcpc");

View File

@ -103,6 +103,21 @@ def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
"Enable Scalable Vector Extension (SVE) instructions">;
def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true",
"Enable Scalable Vector Extension 2 (SVE2) instructions", [FeatureSVE]>;
def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true",
"Enable AES SVE2 instructions", [FeatureSVE2, FeatureAES]>;
def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true",
"Enable SM4 SVE2 instructions", [FeatureSVE2, FeatureSM4]>;
def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true",
"Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>;
def FeatureSVE2BitPerm : SubtargetFeature<"bitperm", "HasSVE2BitPerm", "true",
"Enable bit permutation SVE2 instructions", [FeatureSVE2]>;
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
"Has zero-cycle register moves">;
def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
@ -395,6 +410,18 @@ def FeatureUseEL#i#ForTP : SubtargetFeature<"tpidr-el"#i, "UseEL"#i#"ForTP",
//===----------------------------------------------------------------------===//
// AArch64 Processors supported.
//
//===----------------------------------------------------------------------===//
// Unsupported features to disable for scheduling models
//===----------------------------------------------------------------------===//
class AArch64Unsupported { list<Predicate> F; }
def SVEUnsupported : AArch64Unsupported {
let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3,
HasSVE2BitPerm];
}
include "AArch64SchedA53.td"
include "AArch64SchedA57.td"
include "AArch64SchedCyclone.td"

View File

@ -107,6 +107,16 @@ def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
"fuse-aes">;
def HasSVE : Predicate<"Subtarget->hasSVE()">,
AssemblerPredicate<"FeatureSVE", "sve">;
def HasSVE2 : Predicate<"Subtarget->hasSVE2()">,
AssemblerPredicate<"FeatureSVE2", "sve2">;
def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">,
AssemblerPredicate<"FeatureSVE2AES", "sve2-aes">;
def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">,
AssemblerPredicate<"FeatureSVE2SM4", "sve2-sm4">;
def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">,
AssemblerPredicate<"FeatureSVE2SHA3", "sve2-sha3">;
def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">,
AssemblerPredicate<"FeatureSVE2BitPerm", "bitperm">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicate<"FeatureRCPC", "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,

View File

@ -26,7 +26,7 @@ def CortexA53Model : SchedMachineModel {
// v 1.0 Spreadsheet
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = [HasSVE];
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
}

View File

@ -31,7 +31,7 @@ def CortexA57Model : SchedMachineModel {
let LoopMicroOpBufferSize = 16;
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = [HasSVE];
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
}
//===----------------------------------------------------------------------===//

View File

@ -18,7 +18,7 @@ def CycloneModel : SchedMachineModel {
let MispredictPenalty = 16; // 14-19 cycles are typical.
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = [HasSVE];
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
}
//===----------------------------------------------------------------------===//

View File

@ -24,7 +24,7 @@ def ExynosM1Model : SchedMachineModel {
let MispredictPenalty = 14; // Minimum branch misprediction penalty.
let CompleteModel = 1; // Use the default model otherwise.
list<Predicate> UnsupportedFeatures = [HasSVE];
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
}
//===----------------------------------------------------------------------===//

View File

@ -24,7 +24,7 @@ def ExynosM3Model : SchedMachineModel {
let MispredictPenalty = 16; // Minimum branch misprediction penalty.
let CompleteModel = 1; // Use the default model otherwise.
list<Predicate> UnsupportedFeatures = [HasSVE];
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
}
//===----------------------------------------------------------------------===//

View File

@ -24,7 +24,7 @@ def ExynosM4Model : SchedMachineModel {
let MispredictPenalty = 16; // Minimum branch misprediction penalty.
let CompleteModel = 1; // Use the default model otherwise.
list<Predicate> UnsupportedFeatures = [HasSVE];
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
}
//===----------------------------------------------------------------------===//

View File

@ -23,7 +23,7 @@ def FalkorModel : SchedMachineModel {
let MispredictPenalty = 11; // Minimum branch misprediction penalty.
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = [HasSVE];
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;

View File

@ -27,7 +27,7 @@ def KryoModel : SchedMachineModel {
let LoopMicroOpBufferSize = 16;
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = [HasSVE];
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;

View File

@ -25,7 +25,7 @@ def ThunderXT8XModel : SchedMachineModel {
let PostRAScheduler = 1; // Use PostRA scheduler.
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = [HasSVE];
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;

View File

@ -25,7 +25,7 @@ def ThunderX2T99Model : SchedMachineModel {
let PostRAScheduler = 1; // Using PostRA sched.
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = [HasSVE];
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;

View File

@ -119,6 +119,7 @@ protected:
bool HasLSLFast = false;
bool HasSVE = false;
bool HasSVE2 = false;
bool HasRCPC = false;
bool HasAggressiveFMA = false;
@ -134,6 +135,12 @@ protected:
bool HasRandGen = false;
bool HasMTE = false;
// Arm SVE2 extensions
bool HasSVE2AES = false;
bool HasSVE2SM4 = false;
bool HasSVE2SHA3 = false;
bool HasSVE2BitPerm = false;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove = false;
@ -360,6 +367,7 @@ public:
bool hasSPE() const { return HasSPE; }
bool hasLSLFast() const { return HasLSLFast; }
bool hasSVE() const { return HasSVE; }
bool hasSVE2() const { return HasSVE2; }
bool hasRCPC() const { return HasRCPC; }
bool hasAggressiveFMA() const { return HasAggressiveFMA; }
bool hasAlternativeNZCV() const { return HasAlternativeNZCV; }
@ -372,6 +380,11 @@ public:
bool hasBTI() const { return HasBTI; }
bool hasRandGen() const { return HasRandGen; }
bool hasMTE() const { return HasMTE; }
// Arm SVE2 extensions
bool hasSVE2AES() const { return HasSVE2AES; }
bool hasSVE2SM4() const { return HasSVE2SM4; }
bool hasSVE2SHA3() const { return HasSVE2SHA3; }
bool hasSVE2BitPerm() const { return HasSVE2BitPerm; }
bool isLittleEndian() const { return IsLittle; }

View File

@ -2835,6 +2835,11 @@ static const struct Extension {
{"pan-rwv", {AArch64::FeaturePAN_RWV}},
{"ccpp", {AArch64::FeatureCCPP}},
{"sve", {AArch64::FeatureSVE}},
{"sve2", {AArch64::FeatureSVE2}},
{"sve2-aes", {AArch64::FeatureSVE2AES}},
{"sve2-sm4", {AArch64::FeatureSVE2SM4}},
{"sve2-sha3", {AArch64::FeatureSVE2SHA3}},
{"bitperm", {AArch64::FeatureSVE2BitPerm}},
// FIXME: Unsupported extensions
{"pan", {}},
{"lor", {}},

View File

@ -1014,8 +1014,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
AArch64::AEK_FP16 | AArch64::AEK_PROFILE |
AArch64::AEK_RAS | AArch64::AEK_LSE |
AArch64::AEK_RDM | AArch64::AEK_SVE |
AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
AArch64::AEK_FP16FML;
AArch64::AEK_SVE2 | AArch64::AEK_DOTPROD |
AArch64::AEK_RCPC | AArch64::AEK_FP16FML;
for (unsigned i = 0; i <= Extensions; i++)
EXPECT_TRUE(i == 0 ? !AArch64::getExtensionFeatures(i, Features)
@ -1043,6 +1043,14 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
{"lse", "nolse", "+lse", "-lse"},
{"rdm", "nordm", "+rdm", "-rdm"},
{"sve", "nosve", "+sve", "-sve"},
{"sve2", "nosve2", "+sve2", "-sve2"},
{"sve2-aes", "nosve2-aes", "+sve2-aes",
"-sve2-aes"},
{"sve2-sm4", "nosve2-sm4", "+sve2-sm4",
"-sve2-sm4"},
{"sve2-sha3", "nosve2-sha3", "+sve2-sha3",
"-sve2-sha3"},
{"bitperm", "nobitperm", "+bitperm", "-bitperm"},
{"dotprod", "nodotprod", "+dotprod", "-dotprod"},
{"rcpc", "norcpc", "+rcpc", "-rcpc" },
{"rng", "norng", "+rand", "-rand"},