mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-02 10:21:54 +00:00
[PowerPC] Clear the sideeffect bit for those instructions that didn't have the match pattern
If the instruction have match pattern, llvm-tblgen will infer the sideeffect bit from the match pattern and it works well. If not, the tblgen will set it as true that hurt the scheduling. PowerPC has some instructions that didn't specify the match pattern(i.e. LXSD etc), which is manually selected post-ra according to the register pressure. We need to clear the sideeffect flag for these instructions. Differential Revision: https://reviews.llvm.org/D69232
This commit is contained in:
parent
0ad4d87942
commit
6f49436f61
@ -943,7 +943,7 @@ def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src),
|
||||
[(set i64:$rD, (sextloadi32 xaddrX4:$src))]>, isPPC64,
|
||||
PPC970_DGroup_Cracked;
|
||||
// For fast-isel:
|
||||
let isCodeGenOnly = 1, mayLoad = 1 in {
|
||||
let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in {
|
||||
def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src),
|
||||
"lwa $rD, $src", IIC_LdStLWA, []>, isPPC64,
|
||||
PPC970_DGroup_Cracked;
|
||||
|
@ -2039,6 +2039,7 @@ def LFIWZX : XForm_25_memOp<31, 887, (outs f8rc:$frD), (ins memrr:$src),
|
||||
}
|
||||
|
||||
// Load Multiple
|
||||
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
|
||||
def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
|
||||
"lmw $rD, $src", IIC_LdStLMW, []>;
|
||||
|
||||
@ -2193,6 +2194,7 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
|
||||
}
|
||||
|
||||
// Store Multiple
|
||||
let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
|
||||
def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
|
||||
"stmw $rS, $dst", IIC_LdStLMW, []>;
|
||||
|
||||
|
@ -1379,7 +1379,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
|
||||
[(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
|
||||
|
||||
// VSX scalar loads introduced in ISA 2.07
|
||||
let mayLoad = 1, mayStore = 0 in {
|
||||
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
|
||||
let CodeSize = 3 in
|
||||
def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src),
|
||||
"lxsspx $XT, $src", IIC_LdStLFD, []>;
|
||||
@ -1404,7 +1404,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
|
||||
} // mayLoad
|
||||
|
||||
// VSX scalar stores introduced in ISA 2.07
|
||||
let mayStore = 1, mayLoad = 0 in {
|
||||
let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
|
||||
let CodeSize = 3 in
|
||||
def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
|
||||
"stxsspx $XT, $dst", IIC_LdStSTFD, []>;
|
||||
@ -2977,7 +2977,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
||||
|
||||
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
|
||||
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
|
||||
let mayLoad = 1, mayStore = 0 in {
|
||||
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
|
||||
// Load Vector
|
||||
def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
|
||||
"lxv $XT, $src", IIC_LdStLFD, []>;
|
||||
@ -3022,7 +3022,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
||||
|
||||
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
|
||||
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
|
||||
let mayStore = 1, mayLoad = 0 in {
|
||||
let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
|
||||
// Store Vector
|
||||
def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
|
||||
"stxv $XT, $dst", IIC_LdStSTFD, []>;
|
||||
@ -3769,7 +3769,7 @@ let AddedComplexity = 400 in {
|
||||
}
|
||||
}
|
||||
|
||||
let Predicates = [HasP9Vector] in {
|
||||
let Predicates = [HasP9Vector], hasSideEffects = 0 in {
|
||||
let mayStore = 1 in {
|
||||
def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
|
||||
(ins spilltovsrrc:$XT, memrr:$dst),
|
||||
|
@ -794,8 +794,6 @@ define void @test_13_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b)
|
||||
;
|
||||
; CHECK-P9-LABEL: test_13_consecutive_stores_of_bytes:
|
||||
; CHECK-P9: # %bb.0: # %entry
|
||||
; CHECK-P9-NEXT: vsldoi v3, v2, v2, 4
|
||||
; CHECK-P9-NEXT: stxsibx vs35, 0, r5
|
||||
; CHECK-P9-NEXT: vsldoi v3, v2, v2, 12
|
||||
; CHECK-P9-NEXT: li r3, 1
|
||||
; CHECK-P9-NEXT: stxsibx vs35, r5, r3
|
||||
@ -808,6 +806,8 @@ define void @test_13_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b)
|
||||
; CHECK-P9-NEXT: vsldoi v3, v2, v2, 2
|
||||
; CHECK-P9-NEXT: li r3, 4
|
||||
; CHECK-P9-NEXT: stxsibx vs35, r5, r3
|
||||
; CHECK-P9-NEXT: vsldoi v3, v2, v2, 4
|
||||
; CHECK-P9-NEXT: stxsibx vs35, 0, r5
|
||||
; CHECK-P9-NEXT: vsldoi v3, v2, v2, 8
|
||||
; CHECK-P9-NEXT: li r3, 5
|
||||
; CHECK-P9-NEXT: stxsibx vs35, r5, r3
|
||||
@ -836,19 +836,19 @@ define void @test_13_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b)
|
||||
;
|
||||
; CHECK-P9-BE-LABEL: test_13_consecutive_stores_of_bytes:
|
||||
; CHECK-P9-BE: # %bb.0: # %entry
|
||||
; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 13
|
||||
; CHECK-P9-BE-NEXT: stxsibx vs35, 0, r5
|
||||
; CHECK-P9-BE-NEXT: li r3, 3
|
||||
; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 5
|
||||
; CHECK-P9-BE-NEXT: stxsibx vs34, r5, r3
|
||||
; CHECK-P9-BE-NEXT: li r3, 1
|
||||
; CHECK-P9-BE-NEXT: stxsibx vs35, r5, r3
|
||||
; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 2
|
||||
; CHECK-P9-BE-NEXT: li r3, 2
|
||||
; CHECK-P9-BE-NEXT: stxsibx vs35, r5, r3
|
||||
; CHECK-P9-BE-NEXT: li r3, 3
|
||||
; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 15
|
||||
; CHECK-P9-BE-NEXT: stxsibx vs34, r5, r3
|
||||
; CHECK-P9-BE-NEXT: li r3, 4
|
||||
; CHECK-P9-BE-NEXT: stxsibx vs35, r5, r3
|
||||
; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 13
|
||||
; CHECK-P9-BE-NEXT: stxsibx vs35, 0, r5
|
||||
; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 9
|
||||
; CHECK-P9-BE-NEXT: li r3, 5
|
||||
; CHECK-P9-BE-NEXT: stxsibx vs35, r5, r3
|
||||
|
@ -1,19 +1,58 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK-P9
|
||||
|
||||
define i64 @store_disjoint_memory(i64* nocapture %P, i64 %v) {
|
||||
entry:
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_disjoint_memory:%bb.0
|
||||
; CHECK:SU(2): STD renamable $x4, 24, renamable $x5 :: (store 8 into %ir.arrayidx)
|
||||
; CHECK:SU([[REG2:[0-9]+]]): STD renamable $x{{[0-9]+}}, 24, renamable $x[[REG5:[0-9]+]]
|
||||
; CHECK-NOT: Successors:
|
||||
; CHECK-NOT: SU(3): Ord Latency=0 Memory
|
||||
; CHECK:SU(3): STD renamable $x4, 16, renamable $x5 :: (store 8 into %ir.arrayidx1)
|
||||
; CHECK-NOT: SU([[REG3]]): Ord Latency=0 Memory
|
||||
; CHECK:SU([[REG3:[0-9]+]]): STD renamable $x{{[0-9]+}}, 16, renamable $x[[REG5]]
|
||||
; CHECK: Predecessors:
|
||||
; CHECK-NOT: SU(2): Ord Latency=0 Memory
|
||||
; CHECK-NOT: SU([[REG2]]): Ord Latency=0 Memory
|
||||
%arrayidx = getelementptr inbounds i64, i64* %P, i64 3
|
||||
store i64 %v, i64* %arrayidx
|
||||
%arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
|
||||
store i64 %v, i64* %arrayidx1
|
||||
ret i64 %v
|
||||
}
|
||||
|
||||
; LXSD is an instruction that can be modeled.
|
||||
@gd = external local_unnamed_addr global [500 x double], align 8
|
||||
@gf = external local_unnamed_addr global [500 x float], align 4
|
||||
|
||||
define double @test_lxsd_no_barrier(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j, double %k, double %l, double %m) {
|
||||
entry:
|
||||
%0 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 10), align 8
|
||||
%1 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 17), align 8
|
||||
%2 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 87), align 8
|
||||
%3 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 97), align 8
|
||||
%4 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 77), align 8
|
||||
%add = fadd double %a, %b
|
||||
%add1 = fadd double %add, %c
|
||||
%add2 = fadd double %add1, %d
|
||||
%add3 = fadd double %add2, %e
|
||||
%add4 = fadd double %add3, %f
|
||||
%add5 = fadd double %add4, %g
|
||||
%add6 = fadd double %add5, %h
|
||||
%add7 = fadd double %add6, %i
|
||||
%add8 = fadd double %add7, %j
|
||||
%add9 = fadd double %add8, %k
|
||||
%add10 = fadd double %add9, %l
|
||||
%add11 = fadd double %add10, %m
|
||||
%add12 = fadd double %add11, %0
|
||||
%add13 = fadd double %add12, %1
|
||||
%add14 = fadd double %add13, %2
|
||||
%add15 = fadd double %add14, %3
|
||||
%add16 = fadd double %add15, %4
|
||||
ret double %add16
|
||||
; CHECK-P9: ********** MI Scheduling **********
|
||||
; CHECK-P9-LABEL: test_lxsd_no_barrier:%bb.0 entry
|
||||
; CHECK-P9-NOT:Global memory object and new barrier chain: SU({{[0-9]+}}).
|
||||
; CHECK-P9:SU({{[0-9]+}}): renamable $vf{{[0-9]+}} = LXSD 136
|
||||
; CHECK-P9:SU({{[0-9]+}}): renamable $vf{{[0-9]+}} = LXSD 696
|
||||
; CHECK-P9:SU({{[0-9]+}}): renamable $vf{{[0-9]+}} = LXSD 776
|
||||
; CHECK-P9:SU({{[0-9]+}}): renamable $vf{{[0-9]+}} = LXSD 616
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user