mirror of
https://github.com/RPCS3/llvm.git
synced 2025-04-08 08:21:54 +00:00
[ARM] Do not test for CPUs, use SubtargetFeatures (Part 3). NFCI
This is a follow-up for r273544 and r273853. The end goal is to get rid of the isSwift / isCortexXY / isWhatever methods. This commit also marks them as obsolete. Differential Revision: http://reviews.llvm.org/D21796 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274616 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f94a00c5b0
commit
96303e05fa
@ -132,6 +132,20 @@ def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR",
|
||||
def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST",
|
||||
"true", "Prefer ISHST barriers">;
|
||||
|
||||
// Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU.
|
||||
def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", "true",
|
||||
"Has muxed AGU and NEON/FPU">;
|
||||
|
||||
// On some targets, a VLDM/VSTM starting with an odd register number needs more
|
||||
// microops than single VLDRS.
|
||||
def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister",
|
||||
"true", "VLDM/VSTM starting with an odd register is slow">;
|
||||
|
||||
// Some targets have a renaming dependency when loading into D subregisters.
|
||||
def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg",
|
||||
"SlowLoadDSubregister", "true",
|
||||
"Loading into D subregs is slow">;
|
||||
|
||||
// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from
|
||||
// VFP to NEON, as an execution domain optimization.
|
||||
def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs",
|
||||
@ -578,6 +592,7 @@ def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9,
|
||||
FeatureFP16,
|
||||
FeatureAvoidPartialCPSR,
|
||||
FeaturePreferVMOVSR,
|
||||
FeatureMuxedUnits,
|
||||
FeatureNEONForFPMovs,
|
||||
FeatureCheckVLDnAlign,
|
||||
FeatureMP]>;
|
||||
@ -598,6 +613,7 @@ def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12,
|
||||
// FIXME: A15 has currently the same Schedule model as A9.
|
||||
def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
|
||||
FeatureHasRetAddrStack,
|
||||
FeatureMuxedUnits,
|
||||
FeatureTrustZone,
|
||||
FeatureT2XtPk,
|
||||
FeatureVFP4,
|
||||
@ -626,6 +642,7 @@ def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17,
|
||||
// division features.
|
||||
def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
|
||||
FeatureHasRetAddrStack,
|
||||
FeatureMuxedUnits,
|
||||
FeatureCheckVLDnAlign,
|
||||
FeatureVMLxForwarding,
|
||||
FeatureT2XtPk,
|
||||
@ -648,6 +665,8 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
|
||||
FeatureHasSlowFPVMLx,
|
||||
FeatureProfUnpredicate,
|
||||
FeaturePrefISHSTBarrier,
|
||||
FeatureSlowOddRegister,
|
||||
FeatureSlowLoadDSubreg,
|
||||
FeatureSlowVGETLNi32,
|
||||
FeatureSlowVDUP32]>;
|
||||
|
||||
|
@ -50,8 +50,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
|
||||
|
||||
// Skip over one non-VFP / NEON instruction.
|
||||
if (!LastMI->isBarrier() &&
|
||||
// On A9, AGU and NEON/FPU are muxed.
|
||||
!(TII.getSubtarget().isLikeA9() && LastMI->mayLoadOrStore()) &&
|
||||
!(TII.getSubtarget().hasMuxedUnits() && LastMI->mayLoadOrStore()) &&
|
||||
(LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
|
||||
MachineBasicBlock::iterator I = LastMI;
|
||||
if (I != LastMI->getParent()->begin()) {
|
||||
|
@ -982,7 +982,7 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
|
||||
bool CanMergeToLSMulti = true;
|
||||
// On swift vldm/vstm starting with an odd register number as that needs
|
||||
// more uops than single vldrs.
|
||||
if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
|
||||
if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
|
||||
CanMergeToLSMulti = false;
|
||||
|
||||
// LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
|
||||
|
@ -249,6 +249,16 @@ protected:
|
||||
/// If true, ISHST barriers will be used for Release semantics.
|
||||
bool PreferISHST = false;
|
||||
|
||||
/// If true, a VLDM/VSTM starting with an odd register number is considered to
|
||||
/// take more microops than single VLDRS/VSTRS.
|
||||
bool SlowOddRegister = false;
|
||||
|
||||
/// If true, loading into a D subregister will be penalized.
|
||||
bool SlowLoadDSubregister = false;
|
||||
|
||||
/// If true, the AGU and NEON/FPU units are multiplexed.
|
||||
bool HasMuxedUnits = false;
|
||||
|
||||
/// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON.
|
||||
bool UseNEONForFPMovs = false;
|
||||
|
||||
@ -382,6 +392,9 @@ public:
|
||||
bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
|
||||
bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
|
||||
|
||||
/// @{
|
||||
/// These functions are obsolete, please consider adding subtarget features
|
||||
/// or properties instead of calling them.
|
||||
bool isCortexA5() const { return ARMProcFamily == CortexA5; }
|
||||
bool isCortexA7() const { return ARMProcFamily == CortexA7; }
|
||||
bool isCortexA8() const { return ARMProcFamily == CortexA8; }
|
||||
@ -392,6 +405,7 @@ public:
|
||||
bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); }
|
||||
bool isCortexR5() const { return ARMProcFamily == CortexR5; }
|
||||
bool isKrait() const { return ARMProcFamily == Krait; }
|
||||
/// @}
|
||||
|
||||
bool hasARMOps() const { return !NoARM; }
|
||||
|
||||
@ -431,6 +445,9 @@ public:
|
||||
bool hasSlowVDUP32() const { return HasSlowVDUP32; }
|
||||
bool preferVMOVSR() const { return PreferVMOVSR; }
|
||||
bool preferISHSTBarriers() const { return PreferISHST; }
|
||||
bool hasSlowOddRegister() const { return SlowOddRegister; }
|
||||
bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; }
|
||||
bool hasMuxedUnits() const { return HasMuxedUnits; }
|
||||
bool useNEONForFPMovs() const { return UseNEONForFPMovs; }
|
||||
bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; }
|
||||
bool nonpipelinedVFP() const { return NonpipelinedVFP; }
|
||||
|
@ -259,10 +259,8 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
|
||||
unsigned Index) {
|
||||
// Penalize inserting into an D-subregister. We end up with a three times
|
||||
// lower estimated throughput on swift.
|
||||
if (ST->isSwift() &&
|
||||
Opcode == Instruction::InsertElement &&
|
||||
ValTy->isVectorTy() &&
|
||||
ValTy->getScalarSizeInBits() <= 32)
|
||||
if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
|
||||
ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
|
||||
return 3;
|
||||
|
||||
if ((Opcode == Instruction::InsertElement ||
|
||||
|
Loading…
x
Reference in New Issue
Block a user