[PowerPC] Clear the sideeffect bit for those instructions that didn't have the match pattern

If the instruction have match pattern, llvm-tblgen will infer the sideeffect bit from the match pattern and it works well.
If not, the tblgen will set it as true that hurt the scheduling.

PowerPC has some instructions that didn't specify the match pattern(i.e. LXSD etc), which is manually selected post-ra according
to the register pressure. We need to clear the sideeffect flag for these instructions.

Differential Revision: https://reviews.llvm.org/D69232
This commit is contained in:
QingShan Zhang 2019-10-30 07:56:35 +00:00
parent 0ad4d87942
commit 6f49436f61
5 changed files with 57 additions and 16 deletions

View File

@ -943,7 +943,7 @@ def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src),
[(set i64:$rD, (sextloadi32 xaddrX4:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
// For fast-isel:
let isCodeGenOnly = 1, mayLoad = 1 in {
let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in {
def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src),
"lwa $rD, $src", IIC_LdStLWA, []>, isPPC64,
PPC970_DGroup_Cracked;

View File

@ -2039,6 +2039,7 @@ def LFIWZX : XForm_25_memOp<31, 887, (outs f8rc:$frD), (ins memrr:$src),
}
// Load Multiple
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
"lmw $rD, $src", IIC_LdStLMW, []>;
@ -2193,6 +2194,7 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
}
// Store Multiple
let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
"stmw $rS, $dst", IIC_LdStLMW, []>;

View File

@ -1379,7 +1379,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
[(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
// VSX scalar loads introduced in ISA 2.07
let mayLoad = 1, mayStore = 0 in {
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
let CodeSize = 3 in
def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src),
"lxsspx $XT, $src", IIC_LdStLFD, []>;
@ -1404,7 +1404,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
} // mayLoad
// VSX scalar stores introduced in ISA 2.07
let mayStore = 1, mayLoad = 0 in {
let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
let CodeSize = 3 in
def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
"stxsspx $XT, $dst", IIC_LdStSTFD, []>;
@ -2977,7 +2977,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
let mayLoad = 1, mayStore = 0 in {
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
// Load Vector
def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
"lxv $XT, $src", IIC_LdStLFD, []>;
@ -3022,7 +3022,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
let mayStore = 1, mayLoad = 0 in {
let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
// Store Vector
def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
"stxv $XT, $dst", IIC_LdStSTFD, []>;
@ -3769,7 +3769,7 @@ let AddedComplexity = 400 in {
}
}
let Predicates = [HasP9Vector] in {
let Predicates = [HasP9Vector], hasSideEffects = 0 in {
let mayStore = 1 in {
def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
(ins spilltovsrrc:$XT, memrr:$dst),

View File

@ -794,8 +794,6 @@ define void @test_13_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b)
;
; CHECK-P9-LABEL: test_13_consecutive_stores_of_bytes:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: vsldoi v3, v2, v2, 4
; CHECK-P9-NEXT: stxsibx vs35, 0, r5
; CHECK-P9-NEXT: vsldoi v3, v2, v2, 12
; CHECK-P9-NEXT: li r3, 1
; CHECK-P9-NEXT: stxsibx vs35, r5, r3
@ -808,6 +806,8 @@ define void @test_13_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b)
; CHECK-P9-NEXT: vsldoi v3, v2, v2, 2
; CHECK-P9-NEXT: li r3, 4
; CHECK-P9-NEXT: stxsibx vs35, r5, r3
; CHECK-P9-NEXT: vsldoi v3, v2, v2, 4
; CHECK-P9-NEXT: stxsibx vs35, 0, r5
; CHECK-P9-NEXT: vsldoi v3, v2, v2, 8
; CHECK-P9-NEXT: li r3, 5
; CHECK-P9-NEXT: stxsibx vs35, r5, r3
@ -836,19 +836,19 @@ define void @test_13_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b)
;
; CHECK-P9-BE-LABEL: test_13_consecutive_stores_of_bytes:
; CHECK-P9-BE: # %bb.0: # %entry
; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 13
; CHECK-P9-BE-NEXT: stxsibx vs35, 0, r5
; CHECK-P9-BE-NEXT: li r3, 3
; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 5
; CHECK-P9-BE-NEXT: stxsibx vs34, r5, r3
; CHECK-P9-BE-NEXT: li r3, 1
; CHECK-P9-BE-NEXT: stxsibx vs35, r5, r3
; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 2
; CHECK-P9-BE-NEXT: li r3, 2
; CHECK-P9-BE-NEXT: stxsibx vs35, r5, r3
; CHECK-P9-BE-NEXT: li r3, 3
; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 15
; CHECK-P9-BE-NEXT: stxsibx vs34, r5, r3
; CHECK-P9-BE-NEXT: li r3, 4
; CHECK-P9-BE-NEXT: stxsibx vs35, r5, r3
; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 13
; CHECK-P9-BE-NEXT: stxsibx vs35, 0, r5
; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 9
; CHECK-P9-BE-NEXT: li r3, 5
; CHECK-P9-BE-NEXT: stxsibx vs35, r5, r3

View File

@ -1,19 +1,58 @@
; REQUIRES: asserts
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK-P9
define i64 @store_disjoint_memory(i64* nocapture %P, i64 %v) {
entry:
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: store_disjoint_memory:%bb.0
; CHECK:SU(2): STD renamable $x4, 24, renamable $x5 :: (store 8 into %ir.arrayidx)
; CHECK:SU([[REG2:[0-9]+]]): STD renamable $x{{[0-9]+}}, 24, renamable $x[[REG5:[0-9]+]]
; CHECK-NOT: Successors:
; CHECK-NOT: SU(3): Ord Latency=0 Memory
; CHECK:SU(3): STD renamable $x4, 16, renamable $x5 :: (store 8 into %ir.arrayidx1)
; CHECK-NOT: SU([[REG3]]): Ord Latency=0 Memory
; CHECK:SU([[REG3:[0-9]+]]): STD renamable $x{{[0-9]+}}, 16, renamable $x[[REG5]]
; CHECK: Predecessors:
; CHECK-NOT: SU(2): Ord Latency=0 Memory
; CHECK-NOT: SU([[REG2]]): Ord Latency=0 Memory
%arrayidx = getelementptr inbounds i64, i64* %P, i64 3
store i64 %v, i64* %arrayidx
%arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
store i64 %v, i64* %arrayidx1
ret i64 %v
}
; LXSD is an instruction that can be modeled.
@gd = external local_unnamed_addr global [500 x double], align 8
@gf = external local_unnamed_addr global [500 x float], align 4
define double @test_lxsd_no_barrier(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j, double %k, double %l, double %m) {
entry:
%0 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 10), align 8
%1 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 17), align 8
%2 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 87), align 8
%3 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 97), align 8
%4 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 77), align 8
%add = fadd double %a, %b
%add1 = fadd double %add, %c
%add2 = fadd double %add1, %d
%add3 = fadd double %add2, %e
%add4 = fadd double %add3, %f
%add5 = fadd double %add4, %g
%add6 = fadd double %add5, %h
%add7 = fadd double %add6, %i
%add8 = fadd double %add7, %j
%add9 = fadd double %add8, %k
%add10 = fadd double %add9, %l
%add11 = fadd double %add10, %m
%add12 = fadd double %add11, %0
%add13 = fadd double %add12, %1
%add14 = fadd double %add13, %2
%add15 = fadd double %add14, %3
%add16 = fadd double %add15, %4
ret double %add16
; CHECK-P9: ********** MI Scheduling **********
; CHECK-P9-LABEL: test_lxsd_no_barrier:%bb.0 entry
; CHECK-P9-NOT:Global memory object and new barrier chain: SU({{[0-9]+}}).
; CHECK-P9:SU({{[0-9]+}}): renamable $vf{{[0-9]+}} = LXSD 136
; CHECK-P9:SU({{[0-9]+}}): renamable $vf{{[0-9]+}} = LXSD 696
; CHECK-P9:SU({{[0-9]+}}): renamable $vf{{[0-9]+}} = LXSD 776
; CHECK-P9:SU({{[0-9]+}}): renamable $vf{{[0-9]+}} = LXSD 616
}