[PowerPC] Mark P9 scheduling model complete

This patch just adds the missing information to the P9 scheduling model to allow the model to be marked as complete. The model has been verified against P9 documentation. The model was verified with utils/schedcover.py. Differential Revision: https://reviews.llvm.org/D35695 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314026 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-11 21:56:15 +00:00 · 2017-09-22 20:17:25 +00:00 · 2017-09-22 20:17:25 +00:00 · 93f389e6f1
commit 93f389e6f1
parent 0b345b230b
4 changed files with 516 additions and 279 deletions
--- a/lib/Target/PowerPC/P9InstrResources.td
+++ b/lib/Target/PowerPC/P9InstrResources.td
@ -12,11 +12,29 @@
 // is listed here. Instructions in this file belong to itinerary classes that
 // have instructions with different resource requirements.
 //
+// The makeup of the P9 CPU is modeled as follows:
+//   - Each CPU is made up of two superslices.
+//   - Each superslice is made up of two slices. Therefore, there are 4 slices
+//      for each CPU.
+//   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
+//   - Each CPU has:
+//     - One CY (Crypto) unit P9_CY_*
+//     - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
+//     - Two PM (Permute) units. One on each superslice. P9_PM_*
+//     - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
+//     - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
+//     - Four DP (Floating Point) units. One on each slice. P9_DP_*
+//       This also includes fixed point multiply add.
+//     - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
+//     - Four Load/Store Queues. P9_LS_*
+//   - Each set of instructions will require a number of these resources.
 //===----------------------------------------------------------------------===//

-
+// Two cycle ALU vector operation that uses an entire superslice.
+//  Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
+//  (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
 def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C, DISP_1C],
      (instrs
    VADDCUW,
    VADDUBM,
@ -26,47 +44,41 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
    VAND,
    VANDC,
    VCMPEQUB,
-    VCMPEQUBo,
    VCMPEQUD,
-    VCMPEQUDo,
    VCMPEQUH,
-    VCMPEQUHo,
    VCMPEQUW,
-    VCMPEQUWo,
-    VCMPGTSB,
-    VCMPGTSBo,
-    VCMPGTSD,
-    VCMPGTSDo,
-    VCMPGTSH,
-    VCMPGTSHo,
-    VCMPGTSW,
-    VCMPGTSWo,
-    VCMPGTUB,
-    VCMPGTUBo,
-    VCMPGTUD,
-    VCMPGTUDo,
-    VCMPGTUH,
-    VCMPGTUHo,
-    VCMPGTUW,
-    VCMPGTUWo,
    VCMPNEB,
-    VCMPNEBo,
    VCMPNEH,
-    VCMPNEHo,
    VCMPNEW,
-    VCMPNEWo,
    VCMPNEZB,
-    VCMPNEZBo,
    VCMPNEZH,
-    VCMPNEZHo,
    VCMPNEZW,
-    VCMPNEZWo,
    VEQV,
    VEXTSB2D,
    VEXTSB2W,
    VEXTSH2D,
    VEXTSH2W,
    VEXTSW2D,
+    VRLB,
+    VRLD,
+    VRLDMI,
+    VRLDNM,
+    VRLH,
+    VRLW,
+    VRLWMI,
+    VRLWNM,
+    VSRAB,
+    VSRAD,
+    VSRAH,
+    VSRAW,
+    VSRB,
+    VSRD,
+    VSRH,
+    VSRW,
+    VSLB,
+    VSLD,
+    VSLH,
+    VSLW,
    VMRGEW,
    VMRGOW,
    VNAND,
@ -77,9 +89,7 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
    VORC,
    VPOPCNTB,
    VPOPCNTH,
-    VPOPCNTW,
    VSEL,
-    VSUBCUW,
    VSUBUBM,
    VSUBUDM,
    VSUBUHM,
@ -98,6 +108,8 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
    XVNEGDP,
    XVNEGSP,
    XVXEXPDP,
+    XVIEXPSP,
+    XVXEXPSP,
    XXLAND,
    XXLANDC,
    XXLEQV,
@ -107,74 +119,29 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
    XXLORf,
    XXLORC,
    XXLXOR,
-    XXSEL
-)>;
-
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
-      (instrs
+    XXSEL,
    XSABSQP,
    XSCPSGNQP,
    XSIEXPQP,
    XSNABSQP,
    XSNEGQP,
-    XSXEXPQP,
-    XSABSDP,
-    XSCPSGNDP,
-    XSIEXPDP,
-    XSNABSDP,
-    XSNEGDP,
-    XSXEXPDP
+    XSXEXPQP
 )>;

-def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
+//  slingle slice. However, since it is Restricted it requires all 3 dispatches
+//  (DISP) for that superslice.
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
+    FCMPUS,
+    FCMPUD,
+    XSTSTDCDP,
+    XSTSTDCSP
+)>;

-    VMINSB,
-    VMINSD,
-    VMINSH,
-    VMINSW,
-    VMINUB,
-    VMINUD,
-    VMINUH,
-    VMINUW,
-    VPOPCNTD,
-    VPRTYBD,
-    VPRTYBW,
-    VRLB,
-    VRLD,
-    VRLDMI,
-    VRLDNM,
-    VRLH,
-    VRLW,
-    VRLWMI,
-    VRLWNM,
-    VSHASIGMAD,
-    VSHASIGMAW,
-    VSLB,
-    VSLD,
-    VSLH,
-    VSLW,
-    VSRAB,
-    VSRAD,
-    VSRAH,
-    VSRAW,
-    VSRB,
-    VSRD,
-    VSRH,
-    VSRW,
-    VSUBSBS,
-    VSUBSHS,
-    VSUBSWS,
-    VSUBUBS,
-    VSUBUHS,
-    VSUBUWS,
-    XSCMPEQDP,
-    XSCMPEXPDP,
-    XSCMPGEDP,
-    XSCMPGTDP,
-    XSCMPODP,
-    XSCMPUDP,
-    XSCVSPDPN,
+// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
+      (instrs
    XSMAXCDP,
    XSMAXDP,
    XSMAXJDP,
@ -183,9 +150,153 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C
    XSMINJDP,
    XSTDIVDP,
    XSTSQRTDP,
-    XSTSTDCDP,
-    XSTSTDCSP,
+    XSCMPEQDP,
+    XSCMPEXPDP,
+    XSCMPGEDP,
+    XSCMPGTDP,
+    XSCMPODP,
+    XSCMPUDP,
    XSXSIGDP,
+    XSCVSPDPN
+)>;
+
+// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
+      (instrs
+    ADDIStocHA,
+    ADDItocL,
+    MCRF,
+    MCRXRX,
+    SLD,
+    SRD,
+    SRAD,
+    SRADI,
+    RLDIC,
+    XSNABSDP,
+    XSXEXPDP,
+    XSABSDP,
+    XSNEGDP,
+    XSCPSGNDP
+)>;
+
+// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
+//  slingle slice. However, since it is Restricted it requires all 3 dispatches
+//  (DISP) for that superslice.
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    RLDCL,
+    RLDCR,
+    RLDIMI,
+    RLDICL,
+    RLDICR,
+    RLDICL_32_64,
+    XSIEXPDP,
+    FMR,
+    FABSD,
+    FABSS,
+    FNABSD,
+    FNABSS,
+    FNEGD,
+    FNEGS,
+    FCPSGND,
+    FCPSGNS
+)>;
+
+// Three cycle ALU vector operation that uses an entire superslice.
+//  Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
+//  (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
+def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
+              DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    VBPERMD,
+    VABSDUB,
+    VABSDUH,
+    VABSDUW,
+    VADDUBS,
+    VADDUHS,
+    VADDUWS,
+    VAVGSB,
+    VAVGSH,
+    VAVGSW,
+    VAVGUB,
+    VAVGUH,
+    VAVGUW,
+    VCMPEQFP,
+    VCMPEQFPo,
+    VCMPGEFP,
+    VCMPGEFPo,
+    VCMPBFP,
+    VCMPBFPo,
+    VCMPGTFP,
+    VCMPGTFPo,
+    VCLZB,
+    VCLZD,
+    VCLZH,
+    VCLZW,
+    VCTZB,
+    VCTZD,
+    VCTZH,
+    VCTZW,
+    VADDSBS,
+    VADDSHS,
+    VADDSWS,
+    VMINFP,
+    VMINSB,
+    VMINSD,
+    VMINSH,
+    VMINSW,
+    VMINUB,
+    VMINUD,
+    VMINUH,
+    VMINUW,
+    VMAXFP,
+    VMAXSB,
+    VMAXSD,
+    VMAXSH,
+    VMAXSW,
+    VMAXUB,
+    VMAXUD,
+    VMAXUH,
+    VMAXUW,
+    VPOPCNTW,
+    VPOPCNTD,
+    VPRTYBD,
+    VPRTYBW,
+    VSHASIGMAD,
+    VSHASIGMAW,
+    VSUBSBS,
+    VSUBSHS,
+    VSUBSWS,
+    VSUBUBS,
+    VSUBUHS,
+    VSUBUWS,
+    VSUBCUW,
+    VCMPGTSB,
+    VCMPGTSBo,
+    VCMPGTSD,
+    VCMPGTSDo,
+    VCMPGTSH,
+    VCMPGTSHo,
+    VCMPGTSW,
+    VCMPGTSWo,
+    VCMPGTUB,
+    VCMPGTUBo,
+    VCMPGTUD,
+    VCMPGTUDo,
+    VCMPGTUH,
+    VCMPGTUHo,
+    VCMPGTUW,
+    VCMPGTUWo,
+    VCMPNEBo,
+    VCMPNEHo,
+    VCMPNEWo,
+    VCMPNEZBo,
+    VCMPNEZHo,
+    VCMPNEZWo,
+    VCMPEQUBo,
+    VCMPEQUDo,
+    VCMPEQUHo,
+    VCMPEQUWo,
    XVCMPEQDP,
    XVCMPEQDPo,
    XVCMPEQSP,
@ -198,7 +309,6 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C
    XVCMPGTDPo,
    XVCMPGTSP,
    XVCMPGTSPo,
-    XVIEXPSP,
    XVMAXDP,
    XVMAXSP,
    XVMINDP,
@ -209,58 +319,15 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C
    XVTSQRTSP,
    XVTSTDCDP,
    XVTSTDCSP,
-    XVXEXPSP,
    XVXSIGDP,
    XVXSIGSP
 )>;

-def : InstRW<[P9_ALUE_4C, P9_ALUO_4C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
-      (instrs
-    VABSDUB,
-    VABSDUH,
-    VABSDUW,
-    VADDSBS,
-    VADDSHS,
-    VADDSWS,
-    VADDUBS,
-    VADDUHS,
-    VADDUWS,
-    VAVGSB,
-    VAVGSH,
-    VAVGSW,
-    VAVGUB,
-    VAVGUH,
-    VAVGUW,
-    VBPERMD,
-    VCLZB,
-    VCLZD,
-    VCLZH,
-    VCLZW,
-    VCMPBFP,
-    VCMPBFPo,
-    VCMPGTFP,
-    VCMPGTFPo,
-    VCTZB,
-    VCTZD,
-    VCTZH,
-    VCTZW,
-    VMAXFP,
-    VMAXSB,
-    VMAXSD,
-    VMAXSH,
-    VMAXSW,
-    VMAXUB,
-    VMAXUD,
-    VMAXUH,
-    VMAXUW,
-    VMINFP,
-    VCMPEQFP,
-    VCMPEQFPo,
-    VCMPGEFP,
-    VCMPGEFPo
-)>;
-
-def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// 7 cycle DP vector operation that uses an entire superslice.
+//  Uses both DP units (the even DPE and odd DPO units), two pipelines
+//  (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
+def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
+              DISP_1C, DISP_1C, DISP_1C],
      (instrs
    VADDFP,
    VCTSXS,
@ -367,8 +434,47 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
    VSUMSWS
 )>;

+// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
+//  dispatch units for the superslice.
 def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
+    FRSP,
+    FRIND,
+    FRINS,
+    FRIPD,
+    FRIPS,
+    FRIZD,
+    FRIZS,
+    FRIMD,
+    FRIMS,
+    FRE,
+    FRES,
+    FRSQRTE,
+    FRSQRTES,
+    FMADDS,
+    FMADD,
+    FMSUBS,
+    FMSUB,
+    FNMADDS,
+    FNMADD,
+    FNMSUBS,
+    FNMSUB,
+    FSELD,
+    FSELS,
+    FADDS,
+    FMULS,
+    FMUL,
+    FSUBS,
+    FCFID,
+    FCTID,
+    FCTIDZ,
+    FCFIDU,
+    FCFIDS,
+    FCFIDUS,
+    FCTIDUZ,
+    FCTIWUZ,
+    FCTIW,
+    FCTIWZ,
    XSMADDADP,
    XSMADDASP,
    XSMADDMDP,
@ -389,7 +495,7 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
    XSNMSUBMSP
 )>;

-
+// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
 def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
      (instrs
    XSADDDP,
@ -397,8 +503,10 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
    XSCVDPHP,
    XSCVDPSP,
    XSCVDPSXDS,
+    XSCVDPSXDSs,
    XSCVDPSXWS,
    XSCVDPUXDS,
+    XSCVDPUXDSs,
    XSCVDPUXWS,
    XSCVHPDP,
    XSCVSPDP,
@ -421,7 +529,10 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
    XSCVDPSPN
 )>;

-def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C],
+// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
    VBPERMQ,
    VCLZLSBB,
@ -469,7 +580,9 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C],
    VSLO,
    VSLV,
    VSPLTB,
+    VSPLTBs,
    VSPLTH,
+    VSPLTHs,
    VSPLTISB,
    VSPLTISH,
    VSPLTISW,
@ -498,6 +611,9 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C],
    XXSLDWI,
    XXSPLTIB,
    XXSPLTW,
+    XXSPLTWs,
+    XXPERMDI,
+    XXPERMDIs,
    VADDCUQ,
    VADDECUQ,
    VADDEUQM,
@ -517,7 +633,10 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C],
    XSXSIGQP
 )>;

-def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
    XSADDQP,
    XSADDQPO,
@ -536,7 +655,10 @@ def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
    XSSUBQPO
 )>;

-def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
    XSMADDQP,
    XSMADDQPO,
@ -550,45 +672,56 @@ def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
    XSNMSUBQPO
 )>;

-def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
    XSDIVQP,
    XSDIVQPO
 )>;

-def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
    XSSQRTQP,
    XSSQRTQPO
 )>;

-// Load Operation in IIC_LdStLFD
-
+// 5 Cycle load uses a single slice.
 def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
      (instrs
    LXSDX,
    LXVD2X,
    LXSIWZX,
    LXV,
-    LXSD
+    LXVX,
+    LXSD,
+    DFLOADf64
 )>;

-def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+// 4 Cycle load uses a single slice.
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
+      (instrs
+    COPY
+)>;
+
+// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
+//  superslice.
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
    LFIWZX,
    LFDX,
    LFD
 )>;

-def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
-      (instrs
-    LXSSPX,
-    LXSIWAX,
-    LXSSP
-)>;
-
-def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
+// Cracked Restricted Load instruction.
+// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
+//  operations cannot be done at the same time and so their latencies are added.
+// Full 6 dispatches are required as this is both cracked and restricted.
+def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
    LFIWAX,
@ -596,14 +729,35 @@ def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
    LFS
 )>;

-def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
+// Cracked Load instruction.
+// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
+//  operations cannot be done at the same time and so their latencies are added.
+// Full 4 dispatches are required as this is a cracked instruction.
+def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    LXSSPX,
+    LXSIWAX,
+    LXSSP,
+    DFLOADf32
+)>;
+
+// Cracked Load that requires the PM resource.
+// Since the Load and the PM cannot be done at the same time the latencies are
+//  added. Requires 8 cycles.
+// Since the PM requires the full superslice we need both EXECE, EXECO pipelines
+//  as well as 3 dispatches for the PM. The Load requires the remaining 2
+//  dispatches.
+def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
    LXVDSX,
+    LXVWSX,
    LXVW4X
 )>;

-// Store Operations in IIC_LdStSTFD.
-
+// Single slice Restricted store operation. The restricted operation requires
+//  all three dispatches for the superslice.
 def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
    STFS,
@ -613,74 +767,83 @@ def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
    STFDX,
    STXSDX,
    STXSSPX,
-    STXSIWX
+    STXSIWX,
+    DFSTOREf32,
+    DFSTOREf64
 )>;

-def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C],
+// Store operation that requires the whole superslice.
+def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
+              DISP_1C, DISP_1C, DISP_1C],
      (instrs
    STXVD2X,
    STXVW4X
 )>;


-// Divide Operations in IIC_IntDivW, IIC_IntDivD.
-
-def : InstRW<[P9_DIV_16C_8, IP_EXECE_1C, DISP_1C, DISP_1C],
+// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
+              DISP_1C, DISP_1C, DISP_1C],
      (instrs
    DIVW,
-    DIVWU
+    DIVWU,
+    MODSW
 )>;

-def : InstRW<[P9_DIV_24C_8, IP_EXECE_1C, DISP_1C, DISP_1C],
+// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
+              DISP_1C, DISP_1C, DISP_1C],
      (instrs
    DIVWE,
    DIVD,
    DIVWEU,
-    DIVDU
+    DIVDU,
+    MODSD,
+    MODUD,
+    MODUW
 )>;

-def : InstRW<[P9_DIV_40C_8, IP_EXECE_1C, DISP_1C, DISP_1C],
+// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
+              DISP_1C, DISP_1C, DISP_1C],
      (instrs
    DIVDE,
    DIVDEU
 )>;

-def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
+//  and one full superslice for the DIV operation since there is only one DIV
+//  per superslice. Latency of DIV plus ALU is 26.
+def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
    DIVWEo,
    DIVWEUo
 )>;

-def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
+//  and one full superslice for the DIV operation since there is only one DIV
+//  per superslice. Latency of DIV plus ALU is 42.
+def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
    DIVDEo,
    DIVDEUo
 )>;

-// Rotate Operations in IIC_IntRotateD, IIC_IntRotateDI
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
-      (instrs
-    SLD,
-    SRD,
-    SRAD,
-    SRADI,
-    RLDIC
-)>;
-
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
-      (instrs
-    RLDCL,
-    RLDCR,
-    RLDIMI,
-    RLDICL,
-    RLDICR,
-    RLDICL_32_64
-)>;
-
 // CR access instructions in _BrMCR, IIC_BrMCRX.

+// Cracked, restricted, ALU operations.
+// Here the two ALU ops can actually be done in parallel and therefore the
+//  latencies are not added together. Otherwise this is like having two
+//  instructions running together on two pipelines and 6 dispatches.
+// ALU ops are 2 cycles each.
 def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
@ -690,13 +853,12 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
    MTCRF8
 )>;

-def : InstRW<[P9_ALU_5C, IP_EXEC_1C, DISP_1C, DISP_1C],
-      (instrs
-    MCRF,
-    MCRXRX
-)>;
-
-def : InstRW<[P9_ALU_5C, P9_ALU_5C, IP_EXEC_1C, IP_EXEC_1C,
+// Cracked, restricted, ALU operations.
+// Here the two ALU ops can actually be done in parallel and therefore the
+//  latencies are not added together. Otherwise this is like having two
+//  instructions running together on two pipelines and 6 dispatches.
+// ALU ops are 3 cycles each.
+def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
    MCRFS
@ -704,93 +866,57 @@ def : InstRW<[P9_ALU_5C, P9_ALU_5C, IP_EXEC_1C, IP_EXEC_1C,

 // FP Div instructions in IIC_FPDivD and IIC_FPDivS.

+// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
 def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
-    FDIV,
+    FDIV
+)>;
+
+// 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
+def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
+      (instrs
    XSDIVDP
 )>;

+// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
 def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
-    FDIVS,
+    FDIVS
+)>;
+
+// 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
+      (instrs
    XSDIVSP
 )>;

-def : InstRW<[P9_DP_24C_8, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// 24 Cycle DP Vector Instruction. Takes one full superslice.
+// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
+//  superslice.
+def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
+              DISP_1C, DISP_1C, DISP_1C],
      (instrs
    XVDIVSP
 )>;

-def : InstRW<[P9_DP_33C_8, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// 33 Cycle DP Vector Instruction. Takes one full superslice.
+// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
+//  superslice.
+def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
+              DISP_1C, DISP_1C, DISP_1C],
      (instrs
    XVDIVDP
 )>;

-// FP Instructions in IIC_FPGeneral, IIC_FPFused
-
-def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
-      (instrs
-    FRSP,
-    FRIND,
-    FRINS,
-    FRIPD,
-    FRIPS,
-    FRIZD,
-    FRIZS,
-    FRIMD,
-    FRIMS,
-    FRE,
-    FRES,
-    FRSQRTE,
-    FRSQRTES,
-    FMADDS,
-    FMADD,
-    FMSUBS,
-    FMSUB,
-    FNMADDS,
-    FNMADD,
-    FNMSUBS,
-    FNMSUB,
-    FSELD,
-    FSELS,
-    FADDS,
-    FMULS,
-    FMUL,
-    FSUBS,
-    FCFID,
-    FCTID,
-    FCTIDZ,
-    FCFIDU,
-    FCFIDS,
-    FCFIDUS,
-    FCTIDUZ,
-    FCTIWUZ,
-    FCTIW,
-    FCTIWZ
-)>;
-
-def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
-      (instrs
-    FMR,
-    FABSD,
-    FABSS,
-    FNABSD,
-    FNABSS,
-    FNEGD,
-    FNEGS,
-    FCPSGND,
-    FCPSGNS
-)>;
-
-def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
-      (instrs
-    FCMPUS,
-    FCMPUD
-)>;
-
 // Load instructions in IIC_LdStLFDU and IIC_LdStLFDUX.

-def : InstRW<[P9_LoadAndALUOp_7C, P9_ALU_2C,
+// Instruction cracked into three pieces. One Load and two ALU operations.
+// The Load and one of the ALU ops cannot be run at the same time and so the
+//  latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
+// Both the load and the ALU that depends on it are restricted and so they take
+//  a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
+// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
+def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C,
              IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
              DISP_1C, DISP_1C, DISP_1C, DISP_1C,
              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
@ -799,10 +925,32 @@ def : InstRW<[P9_LoadAndALUOp_7C, P9_ALU_2C,
    LFSUX
 )>;

-def : InstRW<[P9_LS_5C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
+// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
+//  the load and so it can be run at the same time as the load. The load is also
+//  restricted. 3 dispatches are from the restricted load while the other two
+//  are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
+//  is required for the ALU.
+def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
      (instrs
    LFDU,
    LFDUX
 )>;

+// Crypto Instructions
+
+// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+  VPMSUMB,
+  VPMSUMD,
+  VPMSUMH,
+  VPMSUMW,
+  VCIPHER,
+  VCIPHERLAST,
+  VNCIPHER,
+  VNCIPHERLAST,
+  VSBOX
+)>;
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@ -2101,4 +2101,5 @@ class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
  let PPC64 = 0;
  let Pattern = pattern;
  let Inst{31-0} = 0;
+  let hasNoSchedulingInfo = 1;
 }
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@ -3951,6 +3951,7 @@ class PPCAsmPseudo<string asm, dag iops>
  let AsmString = asm;
  let isAsmParserOnly = 1;
  let isPseudo = 1;
+  let hasNoSchedulingInfo = 1;
 }

 def : InstAlias<"sc", (SC 0)>;
--- a/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/lib/Target/PowerPC/PPCScheduleP9.td
@ -22,7 +22,9 @@ def P9Model : SchedMachineModel {
  // Try to make sure we have at least 10 dispatch groups in a loop.
  let LoopMicroOpBufferSize = 60;

-  let CompleteModel = 0;
+  let CompleteModel = 1;
+
+  let UnsupportedFeatures = [HasQPX];

 }

@ -68,6 +70,10 @@ let SchedModel = P9Model in {
  def LS : ProcResource<4>;
  def PM : ProcResource<2>;
  def DFU : ProcResource<1>;
+  def BR : ProcResource<1> {
+    let BufferSize = 16;
+  }
+  def CY : ProcResource<1>;

  def TestGroup : ProcResGroup<[ALU, DP]>;

@ -145,6 +151,10 @@ let SchedModel = P9Model in {
    let Latency = 6;
  }

+  def P9_DIV_12C : SchedWriteRes<[DIV]> {
+    let Latency = 12;
+  }
+
  def P9_DIV_16C_8 : SchedWriteRes<[DIV]> {
    let ResourceCycles = [8];
    let Latency = 16;
@ -190,6 +200,16 @@ let SchedModel = P9Model in {
    let Latency = 24;
  }

+  def P9_DPO_24C_8 : SchedWriteRes<[DPO]> {
+    let ResourceCycles = [8];
+    let Latency = 24;
+  }
+
+  def P9_DPE_24C_8 : SchedWriteRes<[DPE]> {
+    let ResourceCycles = [8];
+    let Latency = 24;
+  }
+
  def P9_DP_26C_5 : SchedWriteRes<[DP]> {
    let ResourceCycles = [5];
    let Latency = 22;
@ -205,6 +225,16 @@ let SchedModel = P9Model in {
    let Latency = 33;
  }

+  def P9_DPE_33C_8 : SchedWriteRes<[DPE]> {
+    let ResourceCycles = [8];
+    let Latency = 33;
+  }
+
+  def P9_DPO_33C_8 : SchedWriteRes<[DPO]> {
+    let ResourceCycles = [8];
+    let Latency = 33;
+  }
+
  def P9_DP_36C_10 : SchedWriteRes<[DP]> {
    let ResourceCycles = [10];
    let Latency = 36;
@ -248,11 +278,25 @@ let SchedModel = P9Model in {
    let Latency = 76;
    let ResourceCycles = [62];
  }
+
+  def P9_BR_2C : SchedWriteRes<[BR]> {
+    let Latency = 2;
+  }
+
+  def P9_BR_5C : SchedWriteRes<[BR]> {
+    let Latency = 5;
+  }
+
+  def P9_CY_6C : SchedWriteRes<[CY]> {
+    let Latency = 6;
+  }
+
  // ***************** WriteSeq Definitions *****************

  def P9_LoadAndALUOp_6C : WriteSequence<[P9_LS_4C, P9_ALU_2C]>;
  def P9_LoadAndALUOp_7C : WriteSequence<[P9_LS_5C, P9_ALU_2C]>;
  def P9_LoadAndPMOp_8C : WriteSequence<[P9_LS_5C, P9_PM_3C]>;
+  def P9_LoadAndLoadOp_8C : WriteSequence<[P9_LS_4C, P9_LS_4C]>;
  def P9_IntDivAndALUOp_26C_8 : WriteSequence<[P9_DIV_24C_8, P9_ALU_2C]>;
  def P9_IntDivAndALUOp_42C_8 : WriteSequence<[P9_DIV_40C_8, P9_ALU_2C]>;
  def P9_StoreAndALUOp_4C : WriteSequence<[P9_LS_1C, P9_ALU_3C]>;
@ -260,19 +304,32 @@ let SchedModel = P9Model in {

  // ***************** Defining Itinerary Class Resources *****************

+  // The following itineraries are fully covered by the InstRW definitions in
+  // P9InstrResources.td so aren't listed here.
+  // IIC_FPDivD, IIC_FPDivS, IIC_FPFused, IIC_IntDivD, IIC_LdStLFDU,
+  // IIC_LdStLFDUX
+
  def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
-               [IIC_IntSimple, IIC_IntGeneral]>;
+               [IIC_IntSimple, IIC_IntGeneral, IIC_IntRFID,
+                IIC_IntRotateD, IIC_IntRotateDI, IIC_IntTrapD,
+                IIC_SprRFI]>;
+
+  def : ItinRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
+               [IIC_IntTrapW]>;

  def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
               [IIC_IntISEL, IIC_IntRotate, IIC_IntShift]>;

  def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C], [IIC_IntCompare]>;

+  def : ItinRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
+                DISP_1C, DISP_1C], [IIC_VecGeneral, IIC_FPCompare]>;
+
  def : ItinRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
-               [IIC_IntMulHW, IIC_IntMulHWU, IIC_IntMulLI]>;
+               [IIC_IntMulHW, IIC_IntMulHWU, IIC_IntMulLI, IIC_IntMulHD]>;

  def : ItinRW<[P9_LS_5C, IP_EXEC_1C, DISP_1C, DISP_1C],
-               [IIC_LdStLoad, IIC_LdStLD]>;
+               [IIC_LdStLoad, IIC_LdStLD, IIC_LdStLFD]>;

  def : ItinRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
                DISP_1C, DISP_1C, DISP_1C, DISP_1C],
@ -300,12 +357,18 @@ let SchedModel = P9Model in {
  def : ItinRW<[P9_LS_4C, IP_EXEC_1C, DISP_1C, DISP_1C],
               [IIC_LdStLWARX, IIC_LdStLDARX, IIC_LdStLMW]>;

+  def : ItinRW<[P9_LS_4C, IP_EXEC_1C, DISP_1C, DISP_1C],
+               [IIC_LdStCOPY, IIC_SprABORT, IIC_LdStPASTE, IIC_LdStDCBF,
+                IIC_LdStICBI, IIC_LdStSync, IIC_SprISYNC, IIC_SprMSGSYNC,
+                IIC_SprSLBIA, IIC_SprSLBSYNC, IIC_SprTLBSYNC]>;
+
  def : ItinRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
               [IIC_LdStSTFD, IIC_LdStSTD, IIC_LdStStore]>;

  def : ItinRW<[P9_LS_1C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
                DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
-               [IIC_LdStSTDU, IIC_LdStSTDUX]>;
+               [IIC_LdStSTDU, IIC_LdStSTDUX, IIC_LdStStoreUpd, IIC_SprSLBIEG,
+                IIC_SprTLBIA, IIC_SprTLBIE]>;

  def : ItinRW<[P9_StoreAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
                DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
@ -315,20 +378,44 @@ let SchedModel = P9Model in {
               [IIC_BrCR, IIC_IntMTFSB0]>;

  def : ItinRW<[P9_ALUOpAndALUOp_4C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
-                IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-                DISP_1C, DISP_1C, DISP_1C], [IIC_SprMFCR, IIC_SprMFCRF]>;
+                IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
+                DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+               [IIC_SprMFCR, IIC_SprMFCRF, IIC_BrMCR, IIC_BrMCRX, IIC_IntMFFS]>;
+
+  def : ItinRW<[P9_BR_2C, DISP_1C], [IIC_BrB]>;
+  def : ItinRW<[P9_BR_5C, DISP_1C], [IIC_SprMFSPR]>;

  // This class should be broken down to instruction level, once some missing
  // info is obtained.
  def : ItinRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
                DISP_1C, DISP_1C, DISP_1C], [IIC_SprMTSPR]>;

-  def : ItinRW<[P9_DP_7C, IP_EXEC_1C,
-                DISP_1C, DISP_1C, DISP_1C], [IIC_FPGeneral, IIC_FPAddSub]>;
+  def : ItinRW<[P9_LoadAndLoadOp_8C, IP_EXEC_1C, DISP_1C, DISP_1C],
+               [IIC_SprSLBIE, IIC_SprSLBMFEE, IIC_SprSLBMFEV, IIC_SprSLBMTE,
+                IIC_SprTLBIEL]>;
+
+  // IIC_VecFP is added here although many instructions with that itinerary
+  // use very different resources. It would appear that instructions were
+  // given that itinerary rather carelessly over time. Specific instructions
+  // that use different resources are listed in various InstrRW classes.
+  def : ItinRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+               [IIC_FPGeneral, IIC_FPAddSub, IIC_VecFP]>;
+
+  def : ItinRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
+                DISP_1C, DISP_1C], [IIC_VecFPCompare]>;
+
+  def : ItinRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C],
+               [IIC_VecPerm]>;

  def : ItinRW<[P9_DP_36C_10, IP_EXEC_1C], [IIC_FPSqrtD]>;
  def : ItinRW<[P9_DP_26C_5, P9_DP_26C_5, IP_EXEC_1C, IP_EXEC_1C], [IIC_FPSqrtS]>;

+  def : ItinRW<[P9_DIV_12C, IP_EXECE_1C, DISP_1C, DISP_1C],
+               [IIC_SprMFMSR, IIC_SprMFPMR, IIC_SprMFSR, IIC_SprMFTB,
+                IIC_SprMTMSR, IIC_SprMTMSRD, IIC_SprMTPMR, IIC_SprMTSR]>;
+
+  def : ItinRW<[], [IIC_SprSTOP]>;
+
  include "P9InstrResources.td"

 }