[lanai] Add subword scheduling itineraries.

Differentiate between word and subword memory operations as they take different
amount of cycles to complete. This just adds a basic model of the subword
latency to the scheduler.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266898 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jacques Pienaar 2016-04-20 18:28:55 +00:00
parent 8f2afa60b7
commit f38ea34724
3 changed files with 63 additions and 17 deletions

View File

@ -496,6 +496,7 @@ class LoadRR<string OpcString, PatFrag OpNode, ValueType Ty>
let Q = src{8};
let BBB = src{7-5};
let JJJJJ = src{4-0};
let mayLoad = 1;
}
class LoadRI<string OpcString, PatFrag OpNode, ValueType Ty>
@ -511,6 +512,7 @@ class LoadRI<string OpcString, PatFrag OpNode, ValueType Ty>
let Q = src{16};
let imm16 = src{15-0};
let isReMaterializable = 1;
let mayLoad = 1;
}
let E = 0 in {
@ -554,19 +556,22 @@ def LDADDR : InstSLS<0x0, (outs GPR:$Rd), (ins MEMi:$src),
let msb = src{20-16};
let lsb = src{15-0};
let isReMaterializable = 1;
let mayLoad = 1;
}
class LoadSPLS<string asmstring, PatFrag opNode>
: InstSPLS<(outs GPR:$Rd), (ins MEMspls:$src),
!strconcat(asmstring, "\t$src, $Rd"),
[(set (i32 GPR:$Rd), (opNode ADDRspls:$src))]>,
Sched<[WriteLD]> {
Sched<[WriteLDSW]> {
bits<17> src;
let Itinerary = IIC_LD;
let Itinerary = IIC_LDSW;
let Rs1 = src{16-12};
let P = src{11};
let Q = src{10};
let imm10 = src{9-0};
let mayLoad = 1;
let isReMaterializable = 1;
}
let Y = 0, S = 0, E = 1 in
@ -586,10 +591,10 @@ def SLI : InstSLI<(outs GPR:$Rd), (ins i32lo21:$imm),
[(set GPR:$Rd, i32lo21:$imm)]> {
bits<21> imm;
let Itinerary = IIC_LD;
let msb = imm{20-16};
let lsb = imm{15-0};
let isReMaterializable = 1;
let isAsCheapAsAMove = 1;
}
// -------------------------------------------------- //
@ -610,6 +615,7 @@ class StoreRR<string OpcString, PatFrag OpNode, ValueType Ty>
let Q = dst{8};
let BBB = dst{7-5};
let JJJJJ = dst{4-0};
let mayStore = 1;
}
class StoreRI<string OpcString, PatFrag OpNode, ValueType Ty>
@ -624,6 +630,7 @@ class StoreRI<string OpcString, PatFrag OpNode, ValueType Ty>
let P = dst{17};
let Q = dst{16};
let imm16 = dst{15-0};
let mayStore = 1;
}
let YL = 0b01, E = 0 in {
@ -647,20 +654,22 @@ def STADDR : InstSLS<0x1, (outs), (ins GPR:$Rd, MEMi:$dst),
let Itinerary = IIC_ST;
let msb = dst{20-16};
let lsb = dst{15-0};
let mayStore = 1;
}
class StoreSPLS<string asmstring, PatFrag opNode>
: InstSPLS<(outs), (ins GPR:$Rd, MEMspls:$dst),
!strconcat(asmstring, "\t$Rd, $dst"),
[(opNode (i32 GPR:$Rd), ADDRspls:$dst)]>,
Sched<[WriteST]> {
Sched<[WriteSTSW]> {
bits<17> dst;
let Itinerary = IIC_ST;
let Itinerary = IIC_STSW;
let Rs1 = dst{16-12};
let P = dst{11};
let Q = dst{10};
let imm10 = dst{9-0};
let mayStore = 1;
}
let Y = 0, S = 1, E = 0 in

View File

@ -10,14 +10,18 @@
def ALU_FU : FuncUnit;
def LDST_FU : FuncUnit;
def IIC_ALU : InstrItinClass;
def IIC_LD : InstrItinClass;
def IIC_ST : InstrItinClass;
def IIC_ALU : InstrItinClass;
def IIC_LD : InstrItinClass;
def IIC_ST : InstrItinClass;
def IIC_LDSW : InstrItinClass;
def IIC_STSW : InstrItinClass;
def LanaiItinerary : ProcessorItineraries<[ALU_FU, LDST_FU],[],[
InstrItinData<IIC_LD, [InstrStage<1, [LDST_FU]>]>,
InstrItinData<IIC_ST, [InstrStage<1, [LDST_FU]>]>,
InstrItinData<IIC_ALU, [InstrStage<1, [ALU_FU]>]>
InstrItinData<IIC_LD, [InstrStage<1, [LDST_FU]>]>,
InstrItinData<IIC_ST, [InstrStage<1, [LDST_FU]>]>,
InstrItinData<IIC_LDSW, [InstrStage<2, [LDST_FU]>]>,
InstrItinData<IIC_STSW, [InstrStage<2, [LDST_FU]>]>,
InstrItinData<IIC_ALU, [InstrStage<1, [ALU_FU]>]>
]>;
def LanaiSchedModel : SchedMachineModel {
@ -55,12 +59,16 @@ def LanaiSchedModel : SchedMachineModel {
def ALU : ProcResource<1> { let BufferSize = 0; }
def LdSt : ProcResource<1> { let BufferSize = 0; }
def WriteLD : SchedWrite;
def WriteST : SchedWrite;
def WriteALU : SchedWrite;
def WriteLD : SchedWrite;
def WriteST : SchedWrite;
def WriteLDSW : SchedWrite;
def WriteSTSW : SchedWrite;
def WriteALU : SchedWrite;
let SchedModel = LanaiSchedModel in {
def : WriteRes<WriteLD, [LdSt]> { let Latency = 2; }
def : WriteRes<WriteST, [LdSt]> { let Latency = 2; }
def : WriteRes<WriteALU, [ALU]> { let Latency = 1; }
def : WriteRes<WriteLD, [LdSt]> { let Latency = 2; }
def : WriteRes<WriteST, [LdSt]> { let Latency = 2; }
def : WriteRes<WriteLDSW, [LdSt]> { let Latency = 2; }
def : WriteRes<WriteSTSW, [LdSt]> { let Latency = 4; }
def : WriteRes<WriteALU, [ALU]> { let Latency = 1; }
}

View File

@ -0,0 +1,29 @@
; RUN: llc < %s -mtriple=lanai-unknown-unknown | FileCheck %s
; Test scheduling of subwords.
%struct.X = type { i16, i16 }
define void @f(%struct.X* inreg nocapture %c) #0 {
entry:
%a = getelementptr inbounds %struct.X, %struct.X* %c, i32 0, i32 0
%0 = load i16, i16* %a, align 2
%inc = add i16 %0, 1
store i16 %inc, i16* %a, align 2
%b = getelementptr inbounds %struct.X, %struct.X* %c, i32 0, i32 1
%1 = load i16, i16* %b, align 2
%dec = add i16 %1, -1
store i16 %dec, i16* %b, align 2
ret void
}
; Verify that the two loads occur before the stores. Without memory
; disambiguation and subword schedule, the resultant code was a per subword
; load-modify-store sequence instead of the more optimal schedule where all
; loads occurred before modification and storage.
; CHECK: uld.h
; CHECK-NEXT: uld.h
; CHECK-NEXT: add
; CHECK-NEXT: st.h
; CHECK-NEXT: sub
; CHECK-NEXT: st.h