[lanai] Add subword scheduling itineraries.

Differentiate between word and subword memory operations as they take different amount of cycles to complete. This just adds a basic model of the subword latency to the scheduler. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266898 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-13 23:18:58 +00:00 · 2016-04-20 18:28:55 +00:00 · 2016-04-20 18:28:55 +00:00 · f38ea34724
commit f38ea34724
parent 8f2afa60b7
3 changed files with 63 additions and 17 deletions
--- a/lib/Target/Lanai/LanaiInstrInfo.td
+++ b/lib/Target/Lanai/LanaiInstrInfo.td
@ -496,6 +496,7 @@ class LoadRR<string OpcString, PatFrag OpNode, ValueType Ty>
  let Q = src{8};
  let BBB = src{7-5};
  let JJJJJ = src{4-0};
+  let mayLoad = 1;
 }

 class LoadRI<string OpcString, PatFrag OpNode, ValueType Ty>
@ -511,6 +512,7 @@ class LoadRI<string OpcString, PatFrag OpNode, ValueType Ty>
  let Q = src{16};
  let imm16 = src{15-0};
  let isReMaterializable = 1;
+  let mayLoad = 1;
 }

 let E = 0 in {
@ -554,19 +556,22 @@ def LDADDR : InstSLS<0x0, (outs GPR:$Rd), (ins MEMi:$src),
  let msb = src{20-16};
  let lsb = src{15-0};
  let isReMaterializable = 1;
+  let mayLoad = 1;
 }

 class LoadSPLS<string asmstring, PatFrag opNode>
  : InstSPLS<(outs GPR:$Rd), (ins MEMspls:$src),
             !strconcat(asmstring, "\t$src, $Rd"),
             [(set (i32 GPR:$Rd), (opNode ADDRspls:$src))]>,
-    Sched<[WriteLD]> {
+    Sched<[WriteLDSW]> {
  bits<17> src;
-  let Itinerary = IIC_LD;
+  let Itinerary = IIC_LDSW;
  let Rs1 = src{16-12};
  let P = src{11};
  let Q = src{10};
  let imm10 = src{9-0};
+  let mayLoad = 1;
+  let isReMaterializable = 1;
 }

 let Y = 0, S = 0, E = 1 in
@ -586,10 +591,10 @@ def SLI : InstSLI<(outs GPR:$Rd), (ins i32lo21:$imm),
                  [(set GPR:$Rd, i32lo21:$imm)]> {
  bits<21> imm;

-  let Itinerary = IIC_LD;
  let msb = imm{20-16};
  let lsb = imm{15-0};
  let isReMaterializable = 1;
+  let isAsCheapAsAMove = 1;
 }

 // -------------------------------------------------- //
@ -610,6 +615,7 @@ class StoreRR<string OpcString, PatFrag OpNode, ValueType Ty>
  let Q = dst{8};
  let BBB = dst{7-5};
  let JJJJJ = dst{4-0};
+  let mayStore = 1;
 }

 class StoreRI<string OpcString, PatFrag OpNode, ValueType Ty>
@ -624,6 +630,7 @@ class StoreRI<string OpcString, PatFrag OpNode, ValueType Ty>
  let P = dst{17};
  let Q = dst{16};
  let imm16 = dst{15-0};
+  let mayStore = 1;
 }

 let YL = 0b01, E = 0 in {
@ -647,20 +654,22 @@ def STADDR : InstSLS<0x1, (outs), (ins GPR:$Rd, MEMi:$dst),
  let Itinerary = IIC_ST;
  let msb = dst{20-16};
  let lsb = dst{15-0};
+  let mayStore = 1;
 }

 class StoreSPLS<string asmstring, PatFrag opNode>
  : InstSPLS<(outs), (ins GPR:$Rd, MEMspls:$dst),
             !strconcat(asmstring, "\t$Rd, $dst"),
             [(opNode (i32 GPR:$Rd), ADDRspls:$dst)]>,
-    Sched<[WriteST]> {
+    Sched<[WriteSTSW]> {
  bits<17> dst;

-  let Itinerary = IIC_ST;
+  let Itinerary = IIC_STSW;
  let Rs1 = dst{16-12};
  let P = dst{11};
  let Q = dst{10};
  let imm10 = dst{9-0};
+  let mayStore = 1;
 }

 let Y = 0, S = 1, E = 0 in
--- a/lib/Target/Lanai/LanaiSchedule.td
+++ b/lib/Target/Lanai/LanaiSchedule.td
@ -10,14 +10,18 @@
 def ALU_FU  : FuncUnit;
 def LDST_FU : FuncUnit;

-def IIC_ALU : InstrItinClass;
-def IIC_LD  : InstrItinClass;
-def IIC_ST  : InstrItinClass;
+def IIC_ALU  : InstrItinClass;
+def IIC_LD   : InstrItinClass;
+def IIC_ST   : InstrItinClass;
+def IIC_LDSW : InstrItinClass;
+def IIC_STSW : InstrItinClass;

 def LanaiItinerary : ProcessorItineraries<[ALU_FU, LDST_FU],[],[
-  InstrItinData<IIC_LD, [InstrStage<1, [LDST_FU]>]>,
-  InstrItinData<IIC_ST, [InstrStage<1, [LDST_FU]>]>,
-  InstrItinData<IIC_ALU, [InstrStage<1, [ALU_FU]>]>
+  InstrItinData<IIC_LD,   [InstrStage<1, [LDST_FU]>]>,
+  InstrItinData<IIC_ST,   [InstrStage<1, [LDST_FU]>]>,
+  InstrItinData<IIC_LDSW, [InstrStage<2, [LDST_FU]>]>,
+  InstrItinData<IIC_STSW, [InstrStage<2, [LDST_FU]>]>,
+  InstrItinData<IIC_ALU,  [InstrStage<1, [ALU_FU]>]>
 ]>;

 def LanaiSchedModel : SchedMachineModel {
@ -55,12 +59,16 @@ def LanaiSchedModel : SchedMachineModel {
 def ALU : ProcResource<1> { let BufferSize = 0; }
 def LdSt : ProcResource<1> { let BufferSize = 0; }

-def WriteLD        : SchedWrite;
-def WriteST        : SchedWrite;
-def WriteALU       : SchedWrite;
+def WriteLD   : SchedWrite;
+def WriteST   : SchedWrite;
+def WriteLDSW : SchedWrite;
+def WriteSTSW : SchedWrite;
+def WriteALU  : SchedWrite;

 let SchedModel = LanaiSchedModel in {
-  def : WriteRes<WriteLD, [LdSt]> { let Latency = 2; }
-  def : WriteRes<WriteST, [LdSt]> { let Latency = 2; }
-  def : WriteRes<WriteALU, [ALU]> { let Latency = 1; }
+  def : WriteRes<WriteLD, [LdSt]>   { let Latency = 2; }
+  def : WriteRes<WriteST, [LdSt]>   { let Latency = 2; }
+  def : WriteRes<WriteLDSW, [LdSt]> { let Latency = 2; }
+  def : WriteRes<WriteSTSW, [LdSt]> { let Latency = 4; }
+  def : WriteRes<WriteALU, [ALU]>   { let Latency = 1; }
 }
--- a/test/CodeGen/Lanai/subword.ll
+++ b/test/CodeGen/Lanai/subword.ll
@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=lanai-unknown-unknown | FileCheck %s
+
+; Test scheduling of subwords.
+
+%struct.X = type { i16, i16 }
+
+define void @f(%struct.X* inreg nocapture %c) #0 {
+entry:
+  %a = getelementptr inbounds %struct.X, %struct.X* %c, i32 0, i32 0
+  %0 = load i16, i16* %a, align 2
+  %inc = add i16 %0, 1
+  store i16 %inc, i16* %a, align 2
+  %b = getelementptr inbounds %struct.X, %struct.X* %c, i32 0, i32 1
+  %1 = load i16, i16* %b, align 2
+  %dec = add i16 %1, -1
+  store i16 %dec, i16* %b, align 2
+  ret void
+}
+
+; Verify that the two loads occur before the stores. Without memory
+; disambiguation and subword schedule, the resultant code was a per subword
+; load-modify-store sequence instead of the more optimal schedule where all
+; loads occurred before modification and storage.
+; CHECK:      uld.h
+; CHECK-NEXT: uld.h
+; CHECK-NEXT: add
+; CHECK-NEXT: st.h
+; CHECK-NEXT: sub
+; CHECK-NEXT: st.h