mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:19:43 +00:00
[AArch64][SVE2] Add SVE2 target features to backend and TargetParser
Summary: This patch adds the following features defined by Arm SVE2 architecture extension: sve2, sve2-aes, sve2-sm4, sve2-sha3, bitperm For existing CPUs these features are declared as unsupported to prevent scheduler errors. The specification can be found here: https://developer.arm.com/docs/ddi0602/latest Reviewers: SjoerdMeijer, sdesmalen, ostannard, rovka Reviewed By: SjoerdMeijer, rovka Subscribers: rovka, javed.absar, tschuett, kristof.beyls, kristina, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D61513 llvm-svn: 360573
This commit is contained in:
parent
711582147b
commit
f378d68edf
@ -50,30 +50,35 @@ AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
|
||||
#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE)
|
||||
#endif
|
||||
// FIXME: This would be nicer were it tablegen
|
||||
AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
|
||||
AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
|
||||
AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
|
||||
AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
|
||||
AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
|
||||
AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
|
||||
AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
|
||||
AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
|
||||
AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
|
||||
AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
|
||||
AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
|
||||
AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
|
||||
AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
|
||||
AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
|
||||
AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
|
||||
AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
|
||||
AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
|
||||
AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
|
||||
AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
|
||||
AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
|
||||
AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
|
||||
AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
|
||||
AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
|
||||
AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
|
||||
AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
|
||||
AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
|
||||
AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
|
||||
AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
|
||||
AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
|
||||
AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
|
||||
AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
|
||||
AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
|
||||
AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
|
||||
AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
|
||||
AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
|
||||
AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
|
||||
AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
|
||||
AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
|
||||
AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
|
||||
AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
|
||||
AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
|
||||
AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
|
||||
AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3")
|
||||
AARCH64_ARCH_EXT_NAME("bitperm", AArch64::AEK_BITPERM, "+bitperm", "-bitperm")
|
||||
AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
|
||||
AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
|
||||
AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
|
||||
AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
|
||||
AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
|
||||
AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
|
||||
#undef AARCH64_ARCH_EXT_NAME
|
||||
|
||||
#ifndef AARCH64_CPU_NAME
|
||||
|
@ -49,6 +49,11 @@ enum ArchExtKind : unsigned {
|
||||
AEK_SSBS = 1 << 20,
|
||||
AEK_SB = 1 << 21,
|
||||
AEK_PREDRES = 1 << 22,
|
||||
AEK_SVE2 = 1 << 23,
|
||||
AEK_SVE2AES = 1 << 24,
|
||||
AEK_SVE2SM4 = 1 << 25,
|
||||
AEK_SVE2SHA3 = 1 << 26,
|
||||
AEK_BITPERM = 1 << 27,
|
||||
};
|
||||
|
||||
enum class ArchKind {
|
||||
|
@ -45,6 +45,11 @@ enum ArchExtKind : unsigned {
|
||||
AEK_AES = 1 << 16,
|
||||
AEK_FP16FML = 1 << 17,
|
||||
AEK_SB = 1 << 18,
|
||||
AEK_SVE2 = 1 << 19,
|
||||
AEK_SVE2AES = 1 << 20,
|
||||
AEK_SVE2SM4 = 1 << 21,
|
||||
AEK_SVE2SHA3 = 1 << 22,
|
||||
AEK_BITPERM = 1 << 23,
|
||||
// Unsupported extensions.
|
||||
AEK_OS = 0x8000000,
|
||||
AEK_IWMMXT = 0x10000000,
|
||||
|
@ -88,6 +88,16 @@ bool AArch64::getExtensionFeatures(unsigned Extensions,
|
||||
Features.push_back("+rdm");
|
||||
if (Extensions & AEK_SVE)
|
||||
Features.push_back("+sve");
|
||||
if (Extensions & AEK_SVE2)
|
||||
Features.push_back("+sve2");
|
||||
if (Extensions & AEK_SVE2AES)
|
||||
Features.push_back("+sve2-aes");
|
||||
if (Extensions & AEK_SVE2SM4)
|
||||
Features.push_back("+sve2-sm4");
|
||||
if (Extensions & AEK_SVE2SHA3)
|
||||
Features.push_back("+sve2-sha3");
|
||||
if (Extensions & AEK_BITPERM)
|
||||
Features.push_back("+bitperm");
|
||||
if (Extensions & AEK_RCPC)
|
||||
Features.push_back("+rcpc");
|
||||
|
||||
|
@ -103,6 +103,21 @@ def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
|
||||
def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
|
||||
"Enable Scalable Vector Extension (SVE) instructions">;
|
||||
|
||||
def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true",
|
||||
"Enable Scalable Vector Extension 2 (SVE2) instructions", [FeatureSVE]>;
|
||||
|
||||
def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true",
|
||||
"Enable AES SVE2 instructions", [FeatureSVE2, FeatureAES]>;
|
||||
|
||||
def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true",
|
||||
"Enable SM4 SVE2 instructions", [FeatureSVE2, FeatureSM4]>;
|
||||
|
||||
def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true",
|
||||
"Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>;
|
||||
|
||||
def FeatureSVE2BitPerm : SubtargetFeature<"bitperm", "HasSVE2BitPerm", "true",
|
||||
"Enable bit permutation SVE2 instructions", [FeatureSVE2]>;
|
||||
|
||||
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
|
||||
"Has zero-cycle register moves">;
|
||||
def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
|
||||
@ -395,6 +410,18 @@ def FeatureUseEL#i#ForTP : SubtargetFeature<"tpidr-el"#i, "UseEL"#i#"ForTP",
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AArch64 Processors supported.
|
||||
//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Unsupported features to disable for scheduling models
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class AArch64Unsupported { list<Predicate> F; }
|
||||
|
||||
def SVEUnsupported : AArch64Unsupported {
|
||||
let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3,
|
||||
HasSVE2BitPerm];
|
||||
}
|
||||
|
||||
include "AArch64SchedA53.td"
|
||||
include "AArch64SchedA57.td"
|
||||
include "AArch64SchedCyclone.td"
|
||||
|
@ -107,6 +107,16 @@ def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
|
||||
"fuse-aes">;
|
||||
def HasSVE : Predicate<"Subtarget->hasSVE()">,
|
||||
AssemblerPredicate<"FeatureSVE", "sve">;
|
||||
def HasSVE2 : Predicate<"Subtarget->hasSVE2()">,
|
||||
AssemblerPredicate<"FeatureSVE2", "sve2">;
|
||||
def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">,
|
||||
AssemblerPredicate<"FeatureSVE2AES", "sve2-aes">;
|
||||
def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">,
|
||||
AssemblerPredicate<"FeatureSVE2SM4", "sve2-sm4">;
|
||||
def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">,
|
||||
AssemblerPredicate<"FeatureSVE2SHA3", "sve2-sha3">;
|
||||
def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">,
|
||||
AssemblerPredicate<"FeatureSVE2BitPerm", "bitperm">;
|
||||
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
|
||||
AssemblerPredicate<"FeatureRCPC", "rcpc">;
|
||||
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
|
||||
|
@ -26,7 +26,7 @@ def CortexA53Model : SchedMachineModel {
|
||||
// v 1.0 Spreadsheet
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
|
||||
}
|
||||
|
||||
|
||||
|
@ -31,7 +31,7 @@ def CortexA57Model : SchedMachineModel {
|
||||
let LoopMicroOpBufferSize = 16;
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -18,7 +18,7 @@ def CycloneModel : SchedMachineModel {
|
||||
let MispredictPenalty = 16; // 14-19 cycles are typical.
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -24,7 +24,7 @@ def ExynosM1Model : SchedMachineModel {
|
||||
let MispredictPenalty = 14; // Minimum branch misprediction penalty.
|
||||
let CompleteModel = 1; // Use the default model otherwise.
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -24,7 +24,7 @@ def ExynosM3Model : SchedMachineModel {
|
||||
let MispredictPenalty = 16; // Minimum branch misprediction penalty.
|
||||
let CompleteModel = 1; // Use the default model otherwise.
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -24,7 +24,7 @@ def ExynosM4Model : SchedMachineModel {
|
||||
let MispredictPenalty = 16; // Minimum branch misprediction penalty.
|
||||
let CompleteModel = 1; // Use the default model otherwise.
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -23,7 +23,7 @@ def FalkorModel : SchedMachineModel {
|
||||
let MispredictPenalty = 11; // Minimum branch misprediction penalty.
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
|
||||
|
||||
// FIXME: Remove when all errors have been fixed.
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
|
@ -27,7 +27,7 @@ def KryoModel : SchedMachineModel {
|
||||
let LoopMicroOpBufferSize = 16;
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
|
||||
|
||||
// FIXME: Remove when all errors have been fixed.
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
|
@ -25,7 +25,7 @@ def ThunderXT8XModel : SchedMachineModel {
|
||||
let PostRAScheduler = 1; // Use PostRA scheduler.
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
|
||||
|
||||
// FIXME: Remove when all errors have been fixed.
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
|
@ -25,7 +25,7 @@ def ThunderX2T99Model : SchedMachineModel {
|
||||
let PostRAScheduler = 1; // Using PostRA sched.
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
|
||||
|
||||
// FIXME: Remove when all errors have been fixed.
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
|
@ -119,6 +119,7 @@ protected:
|
||||
|
||||
bool HasLSLFast = false;
|
||||
bool HasSVE = false;
|
||||
bool HasSVE2 = false;
|
||||
bool HasRCPC = false;
|
||||
bool HasAggressiveFMA = false;
|
||||
|
||||
@ -134,6 +135,12 @@ protected:
|
||||
bool HasRandGen = false;
|
||||
bool HasMTE = false;
|
||||
|
||||
// Arm SVE2 extensions
|
||||
bool HasSVE2AES = false;
|
||||
bool HasSVE2SM4 = false;
|
||||
bool HasSVE2SHA3 = false;
|
||||
bool HasSVE2BitPerm = false;
|
||||
|
||||
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
|
||||
bool HasZeroCycleRegMove = false;
|
||||
|
||||
@ -360,6 +367,7 @@ public:
|
||||
bool hasSPE() const { return HasSPE; }
|
||||
bool hasLSLFast() const { return HasLSLFast; }
|
||||
bool hasSVE() const { return HasSVE; }
|
||||
bool hasSVE2() const { return HasSVE2; }
|
||||
bool hasRCPC() const { return HasRCPC; }
|
||||
bool hasAggressiveFMA() const { return HasAggressiveFMA; }
|
||||
bool hasAlternativeNZCV() const { return HasAlternativeNZCV; }
|
||||
@ -372,6 +380,11 @@ public:
|
||||
bool hasBTI() const { return HasBTI; }
|
||||
bool hasRandGen() const { return HasRandGen; }
|
||||
bool hasMTE() const { return HasMTE; }
|
||||
// Arm SVE2 extensions
|
||||
bool hasSVE2AES() const { return HasSVE2AES; }
|
||||
bool hasSVE2SM4() const { return HasSVE2SM4; }
|
||||
bool hasSVE2SHA3() const { return HasSVE2SHA3; }
|
||||
bool hasSVE2BitPerm() const { return HasSVE2BitPerm; }
|
||||
|
||||
bool isLittleEndian() const { return IsLittle; }
|
||||
|
||||
|
@ -2835,6 +2835,11 @@ static const struct Extension {
|
||||
{"pan-rwv", {AArch64::FeaturePAN_RWV}},
|
||||
{"ccpp", {AArch64::FeatureCCPP}},
|
||||
{"sve", {AArch64::FeatureSVE}},
|
||||
{"sve2", {AArch64::FeatureSVE2}},
|
||||
{"sve2-aes", {AArch64::FeatureSVE2AES}},
|
||||
{"sve2-sm4", {AArch64::FeatureSVE2SM4}},
|
||||
{"sve2-sha3", {AArch64::FeatureSVE2SHA3}},
|
||||
{"bitperm", {AArch64::FeatureSVE2BitPerm}},
|
||||
// FIXME: Unsupported extensions
|
||||
{"pan", {}},
|
||||
{"lor", {}},
|
||||
|
@ -1014,8 +1014,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
|
||||
AArch64::AEK_FP16 | AArch64::AEK_PROFILE |
|
||||
AArch64::AEK_RAS | AArch64::AEK_LSE |
|
||||
AArch64::AEK_RDM | AArch64::AEK_SVE |
|
||||
AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
|
||||
AArch64::AEK_FP16FML;
|
||||
AArch64::AEK_SVE2 | AArch64::AEK_DOTPROD |
|
||||
AArch64::AEK_RCPC | AArch64::AEK_FP16FML;
|
||||
|
||||
for (unsigned i = 0; i <= Extensions; i++)
|
||||
EXPECT_TRUE(i == 0 ? !AArch64::getExtensionFeatures(i, Features)
|
||||
@ -1043,6 +1043,14 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
|
||||
{"lse", "nolse", "+lse", "-lse"},
|
||||
{"rdm", "nordm", "+rdm", "-rdm"},
|
||||
{"sve", "nosve", "+sve", "-sve"},
|
||||
{"sve2", "nosve2", "+sve2", "-sve2"},
|
||||
{"sve2-aes", "nosve2-aes", "+sve2-aes",
|
||||
"-sve2-aes"},
|
||||
{"sve2-sm4", "nosve2-sm4", "+sve2-sm4",
|
||||
"-sve2-sm4"},
|
||||
{"sve2-sha3", "nosve2-sha3", "+sve2-sha3",
|
||||
"-sve2-sha3"},
|
||||
{"bitperm", "nobitperm", "+bitperm", "-bitperm"},
|
||||
{"dotprod", "nodotprod", "+dotprod", "-dotprod"},
|
||||
{"rcpc", "norcpc", "+rcpc", "-rcpc" },
|
||||
{"rng", "norng", "+rand", "-rand"},
|
||||
|
Loading…
Reference in New Issue
Block a user