mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-24 12:19:53 +00:00
[AArch64] Add support for Qualcomm Kryo CPU.
Machine model description by Dave Estes <cestes@codeaurora.org>. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260686 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9234391598
commit
1f88b2d0b7
@ -91,6 +91,7 @@ include "AArch64SchedA53.td"
|
||||
include "AArch64SchedA57.td"
|
||||
include "AArch64SchedCyclone.td"
|
||||
include "AArch64SchedM1.td"
|
||||
include "AArch64SchedKryo.td"
|
||||
|
||||
def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
|
||||
"Cortex-A35 ARM processors",
|
||||
@ -133,6 +134,14 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
|
||||
FeatureCRC,
|
||||
FeaturePerfMon]>;
|
||||
|
||||
def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
|
||||
"Qualcomm Kryo processors",
|
||||
[FeatureFPARMv8,
|
||||
FeatureNEON,
|
||||
FeatureCrypto,
|
||||
FeatureCRC,
|
||||
FeaturePerfMon]>;
|
||||
|
||||
def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
|
||||
FeatureNEON,
|
||||
FeatureCRC,
|
||||
@ -146,6 +155,7 @@ def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
|
||||
def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA57]>;
|
||||
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
|
||||
def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>;
|
||||
def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Assembly parser
|
||||
|
@ -637,7 +637,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
}
|
||||
|
||||
// Prefer likely predicted branches to selects on out-of-order cores.
|
||||
if (Subtarget->isCortexA57())
|
||||
if (Subtarget->isCortexA57() || Subtarget->isKryo())
|
||||
PredictableSelectIsExpensive = true;
|
||||
}
|
||||
|
||||
|
@ -543,7 +543,8 @@ static bool canBeExpandedToORR(const MachineInstr *MI, unsigned BitSize) {
|
||||
// FIXME: this implementation should be micro-architecture dependent, so a
|
||||
// micro-architecture target hook should be introduced here in future.
|
||||
bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
|
||||
if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53())
|
||||
if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53() &&
|
||||
!Subtarget.isKryo())
|
||||
return MI->isAsCheapAsAMove();
|
||||
|
||||
switch (MI->getOpcode()) {
|
||||
|
@ -1969,7 +1969,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
|
||||
}
|
||||
|
||||
bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {
|
||||
bool ProfitableArch = Subtarget->isCortexA57();
|
||||
bool ProfitableArch = Subtarget->isCortexA57() || Subtarget->isKryo();
|
||||
// FIXME: The benefit from converting narrow loads into a wider load could be
|
||||
// microarchitectural as it assumes that a single load with two bitfield
|
||||
// extracts is cheaper than two narrow loads. Currently, this conversion is
|
||||
|
130
lib/Target/AArch64/AArch64SchedKryo.td
Normal file
130
lib/Target/AArch64/AArch64SchedKryo.td
Normal file
@ -0,0 +1,130 @@
|
||||
//==- AArch64SchedKryo.td - Qualcomm Kryo Scheduling Defs ---*- tablegen -*-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the machine model for Qualcomm Kryo to support
|
||||
// instruction scheduling and other instruction cost heuristics.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// The issue width is set to five, matching the five issue queues for expanded
|
||||
// uops. Now, the latency spreadsheet has information based on fragmented uops,
|
||||
// but these do not actually take up an issue queue.
|
||||
|
||||
def KryoModel : SchedMachineModel {
|
||||
let IssueWidth = 5; // 5-wide issue for expanded uops
|
||||
let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer
|
||||
let LoadLatency = 4; // Optimistic load latency
|
||||
let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
|
||||
|
||||
// Enable partial & runtime unrolling. The magic number is chosen based on
|
||||
// experiments and benchmarking data.
|
||||
let LoopMicroOpBufferSize = 16;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Define each kind of processor resource and number available on Kryo.
|
||||
|
||||
let SchedModel = KryoModel in {
|
||||
def KryoUnitXA : ProcResource<1>; // Type X(A) micro-ops
|
||||
def KryoUnitXB : ProcResource<1>; // Type X(B) micro-ops
|
||||
def KryoUnitYA : ProcResource<1>; // Type Y(A) micro-ops
|
||||
def KryoUnitYB : ProcResource<1>; // Type Y(B) micro-ops
|
||||
def KryoUnitX : ProcResGroup<[KryoUnitXA, // Type X micro-ops
|
||||
KryoUnitXB]>;
|
||||
def KryoUnitY : ProcResGroup<[KryoUnitYA, // Type Y micro-ops
|
||||
KryoUnitYB]>;
|
||||
def KryoUnitXY : ProcResGroup<[KryoUnitXA, // Type XY micro-ops
|
||||
KryoUnitXB,
|
||||
KryoUnitYA,
|
||||
KryoUnitYB]>;
|
||||
def KryoUnitLSA : ProcResource<1>; // Type LS(A) micro-ops
|
||||
def KryoUnitLSB : ProcResource<1>; // Type LS(B) micro-ops
|
||||
def KryoUnitLS : ProcResGroup<[KryoUnitLSA, // Type LS micro-ops
|
||||
KryoUnitLSB]>;
|
||||
}
|
||||
|
||||
let SchedModel = KryoModel in {
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Map the target-defined scheduler read/write resources and latency for
|
||||
// Kryo.
|
||||
|
||||
def : WriteRes<WriteImm, [KryoUnitXY]> { let Latency = 1; }
|
||||
def : WriteRes<WriteI, [KryoUnitXY]> { let Latency = 1; }
|
||||
def : WriteRes<WriteISReg, [KryoUnitXY, KryoUnitXY]>
|
||||
{ let Latency = 2; let NumMicroOps = 2; }
|
||||
def : WriteRes<WriteIEReg, [KryoUnitXY, KryoUnitXY]>
|
||||
{ let Latency = 2; let NumMicroOps = 2; }
|
||||
def : WriteRes<WriteExtr, [KryoUnitXY, KryoUnitX]>
|
||||
{ let Latency = 2; let NumMicroOps = 2; }
|
||||
def : WriteRes<WriteIS, [KryoUnitXY]> { let Latency = 2; }
|
||||
def : WriteRes<WriteID32, [KryoUnitXA, KryoUnitY]>
|
||||
{ let Latency = 8; let NumMicroOps = 1; } // Fragent -1
|
||||
def : WriteRes<WriteID64, [KryoUnitXA, KryoUnitY]>
|
||||
{ let Latency = 8; let NumMicroOps = 1; } // Fragent -1
|
||||
def : WriteRes<WriteIM32, [KryoUnitX]> { let Latency = 5; }
|
||||
def : WriteRes<WriteIM64, [KryoUnitX]> { let Latency = 5; }
|
||||
def : WriteRes<WriteBr, [KryoUnitXY]> { let Latency = 1; }
|
||||
def : WriteRes<WriteBrReg, [KryoUnitXY]> { let Latency = 1; }
|
||||
def : WriteRes<WriteLD, [KryoUnitLS]> { let Latency = 4; }
|
||||
def : WriteRes<WriteST, [KryoUnitLS]> { let Latency = 4; }
|
||||
def : WriteRes<WriteSTP, [KryoUnitLS]> { let Latency = 4; }
|
||||
def : WriteRes<WriteAdr, [KryoUnitXY]> { let Latency = 6; }
|
||||
def : WriteRes<WriteLDIdx, [KryoUnitLS]> { let Latency = 4; }
|
||||
def : WriteRes<WriteSTIdx, [KryoUnitLS]> { let Latency = 4; }
|
||||
def : WriteRes<WriteF, [KryoUnitXY, KryoUnitXY]>
|
||||
{ let Latency = 3; let NumMicroOps = 2; }
|
||||
def : WriteRes<WriteFCmp, [KryoUnitXY]> { let Latency = 2; }
|
||||
def : WriteRes<WriteFCvt, [KryoUnitX]> { let Latency = 4; }
|
||||
def : WriteRes<WriteFCopy, [KryoUnitXY]> { let Latency = 6; }
|
||||
def : WriteRes<WriteFImm, [KryoUnitXY]> { let Latency = 6; }
|
||||
def : WriteRes<WriteFMul, [KryoUnitX, KryoUnitX]>
|
||||
{ let Latency = 6; let NumMicroOps = 2; }
|
||||
def : WriteRes<WriteFDiv, [KryoUnitXA, KryoUnitY]>
|
||||
{ let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
|
||||
def : WriteRes<WriteV, [KryoUnitXY]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVLD, [KryoUnitLS]> { let Latency = 4; }
|
||||
def : WriteRes<WriteVST, [KryoUnitLS]> { let Latency = 4; }
|
||||
|
||||
def : WriteRes<WriteSys, []> { let Latency = 1; }
|
||||
def : WriteRes<WriteBarrier, []> { let Latency = 1; }
|
||||
def : WriteRes<WriteHint, []> { let Latency = 1; }
|
||||
|
||||
def : WriteRes<WriteLDHi, []> { let Latency = 4; }
|
||||
|
||||
// No forwarding logic is modelled yet.
|
||||
def : ReadAdvance<ReadI, 0>;
|
||||
def : ReadAdvance<ReadISReg, 0>;
|
||||
def : ReadAdvance<ReadIEReg, 0>;
|
||||
def : ReadAdvance<ReadIM, 0>;
|
||||
def : ReadAdvance<ReadIMA, 0>;
|
||||
def : ReadAdvance<ReadID, 0>;
|
||||
def : ReadAdvance<ReadExtrHi, 0>;
|
||||
def : ReadAdvance<ReadAdrBase, 0>;
|
||||
def : ReadAdvance<ReadVLD, 0>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Specialize the coarse model by associating instruction groups with the
|
||||
// subtarget-defined types. As the modeled is refined, this will override most
|
||||
// of the above SchedWriteRes and SchedAlias mappings.
|
||||
|
||||
// Miscellaneous
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[WriteI], (instrs COPY)>;
|
||||
|
||||
|
||||
// Detailed Refinedments
|
||||
// -----------------------------------------------------------------------------
|
||||
include "AArch64SchedKryoDetails.td"
|
||||
|
||||
|
||||
} // SchedModel = KryoModel
|
2358
lib/Target/AArch64/AArch64SchedKryoDetails.td
Normal file
2358
lib/Target/AArch64/AArch64SchedKryoDetails.td
Normal file
File diff suppressed because it is too large
Load Diff
@ -39,7 +39,8 @@ protected:
|
||||
CortexA53,
|
||||
CortexA57,
|
||||
Cyclone,
|
||||
ExynosM1
|
||||
ExynosM1,
|
||||
Kryo
|
||||
};
|
||||
|
||||
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
|
||||
@ -151,6 +152,7 @@ public:
|
||||
bool isCortexA57() const { return CPUString == "cortex-a57"; }
|
||||
bool isCortexA53() const { return CPUString == "cortex-a53"; }
|
||||
bool isExynosM1() const { return CPUString == "exynos-m1"; }
|
||||
bool isKryo() const { return CPUString == "kryo"; }
|
||||
|
||||
bool useAA() const override { return isCortexA53(); }
|
||||
|
||||
|
@ -472,7 +472,7 @@ int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
|
||||
}
|
||||
|
||||
unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
|
||||
if (ST->isCortexA57())
|
||||
if (ST->isCortexA57() || ST->isKryo())
|
||||
return 4;
|
||||
return 2;
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
; RUN: llc < %s -mtriple aarch64--none-eabi -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=LE
|
||||
; RUN: llc < %s -mtriple aarch64_be--none-eabi -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=BE
|
||||
; RUN: llc < %s -mtriple aarch64--none-eabi -mcpu=kryo -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=LE
|
||||
|
||||
; CHECK-LABEL: Ldrh_merge
|
||||
; CHECK-NOT: ldrh
|
||||
|
@ -7,6 +7,7 @@
|
||||
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a57 2>&1 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a72 2>&1 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=exynos-m1 2>&1 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=kryo 2>&1 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
|
||||
|
||||
; CHECK-NOT: {{.*}} is not a recognized processor for this target
|
||||
|
@ -3,6 +3,7 @@
|
||||
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a53 -o - %s | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a72 -o - %s | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=exynos-m1 -o - %s | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=kryo -o - %s | FileCheck %s
|
||||
|
||||
%X = type { i64, i64, i64 }
|
||||
declare void @f(%X*)
|
||||
|
Loading…
Reference in New Issue
Block a user