mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-19 02:08:06 +00:00
[ARM] Add VLDx/VSTx sched defs for machine-schedulers. NFCI
This patch adds missing scheds for Neon VLDx/VSTx instructions. This will help one write schedulers easier/faster in the future for ARM sub-targets. Existing models will not affected by this patch. Reviewed by: Renato Golin, Diana Picus Differential Revision: https://reviews.llvm.org/D33120 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303717 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6ff9a78ce5
commit
e244393f0c
File diff suppressed because it is too large
Load Diff
@ -131,6 +131,17 @@ def WriteFPDIV64 : SchedWrite;
|
||||
def WriteFPSQRT32 : SchedWrite;
|
||||
def WriteFPSQRT64 : SchedWrite;
|
||||
|
||||
// Vector load and stores
|
||||
def WriteVLD1 : SchedWrite;
|
||||
def WriteVLD2 : SchedWrite;
|
||||
def WriteVLD3 : SchedWrite;
|
||||
def WriteVLD4 : SchedWrite;
|
||||
def WriteVST1 : SchedWrite;
|
||||
def WriteVST2 : SchedWrite;
|
||||
def WriteVST3 : SchedWrite;
|
||||
def WriteVST4 : SchedWrite;
|
||||
|
||||
|
||||
// Define TII for use in SchedVariant Predicates.
|
||||
def : PredicateProlog<[{
|
||||
const ARMBaseInstrInfo *TII =
|
||||
|
@ -1981,6 +1981,15 @@ def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; }
|
||||
def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
|
||||
def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
|
||||
|
||||
def : WriteRes<WriteVLD1, []>;
|
||||
def : WriteRes<WriteVLD2, []>;
|
||||
def : WriteRes<WriteVLD3, []>;
|
||||
def : WriteRes<WriteVLD4, []>;
|
||||
def : WriteRes<WriteVST1, []>;
|
||||
def : WriteRes<WriteVST2, []>;
|
||||
def : WriteRes<WriteVST3, []>;
|
||||
def : WriteRes<WriteVST4, []>;
|
||||
|
||||
// Reserve A9UnitFP for 2 consecutive cycles.
|
||||
def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
|
||||
let Latency = 4;
|
||||
|
@ -120,6 +120,12 @@ def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
|
||||
def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
|
||||
def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
|
||||
|
||||
// Overriden via InstRW for this processor.
|
||||
def : WriteRes<WriteVST1, []>;
|
||||
def : WriteRes<WriteVST2, []>;
|
||||
def : WriteRes<WriteVST3, []>;
|
||||
def : WriteRes<WriteVST4, []>;
|
||||
|
||||
def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
|
||||
def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
|
||||
|
||||
@ -712,20 +718,20 @@ def R52WriteSTM : SchedWriteVariant<[
|
||||
|
||||
// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
|
||||
// another instruction in slot-1, but only in the last issue.
|
||||
def R52WriteVLD1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5;}
|
||||
def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> {
|
||||
def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
|
||||
def : WriteRes<WriteVLD2, [R52UnitLd]> {
|
||||
let Latency = 6;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [2];
|
||||
let SingleIssue = 1;
|
||||
}
|
||||
def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> {
|
||||
def : WriteRes<WriteVLD3, [R52UnitLd]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 5;
|
||||
let ResourceCycles = [3];
|
||||
let SingleIssue = 1;
|
||||
}
|
||||
def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> {
|
||||
def : WriteRes<WriteVLD4, [R52UnitLd]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 7;
|
||||
let ResourceCycles = [4];
|
||||
@ -828,95 +834,6 @@ def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
|
||||
def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
|
||||
def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
|
||||
|
||||
//---
|
||||
// VLDx. Vector Loads
|
||||
//---
|
||||
// 1-element structure load
|
||||
def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>;
|
||||
def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>;
|
||||
def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>;
|
||||
def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>;
|
||||
def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>;
|
||||
def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>;
|
||||
|
||||
def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>;
|
||||
|
||||
def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>;
|
||||
def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>;
|
||||
def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>;
|
||||
def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>;
|
||||
def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>;
|
||||
def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>;
|
||||
|
||||
def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>;
|
||||
|
||||
// 2-element structure load
|
||||
def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>;
|
||||
def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>;
|
||||
def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>;
|
||||
def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>;
|
||||
|
||||
def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>;
|
||||
|
||||
def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
|
||||
|
||||
def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>;
|
||||
|
||||
def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>;
|
||||
def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>;
|
||||
|
||||
// 3-element structure load
|
||||
def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>;
|
||||
def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
|
||||
def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>;
|
||||
def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
|
||||
|
||||
def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
|
||||
|
||||
def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
|
||||
def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
|
||||
def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
|
||||
def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
|
||||
|
||||
// 4-element structure load
|
||||
def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>;
|
||||
def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>;
|
||||
def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
|
||||
def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
|
||||
|
||||
|
||||
def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>;
|
||||
def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>;
|
||||
def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>;
|
||||
def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>;
|
||||
def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
|
||||
def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
|
||||
|
||||
//---
|
||||
// VSTx. Vector Stores
|
||||
//---
|
||||
|
@ -1070,6 +1070,16 @@ let SchedModel = SwiftModel in {
|
||||
def : ReadAdvance<ReadFPMUL, 0>;
|
||||
def : ReadAdvance<ReadFPMAC, 0>;
|
||||
|
||||
// Overriden via InstRW for this processor.
|
||||
def : WriteRes<WriteVLD1, []>;
|
||||
def : WriteRes<WriteVLD2, []>;
|
||||
def : WriteRes<WriteVLD3, []>;
|
||||
def : WriteRes<WriteVLD4, []>;
|
||||
def : WriteRes<WriteVST1, []>;
|
||||
def : WriteRes<WriteVST2, []>;
|
||||
def : WriteRes<WriteVST3, []>;
|
||||
def : WriteRes<WriteVST4, []>;
|
||||
|
||||
// Not specified.
|
||||
def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
|
||||
// Preload.
|
||||
|
Loading…
x
Reference in New Issue
Block a user