mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-28 06:00:30 +00:00
[DAG] Do MergeConsecutiveStores again before Instruction Selection
Summary: Now that store-merge is only generates type-safe stores, do a second pass just before instruction selection to allow lowered intrinsics to be merged as well. Reviewers: jyknight, hfinkel, RKSimon, efriedma, rnk, jmolloy Subscribers: javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D33675 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319036 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0782e893a9
commit
dbbb6c5fc3
@ -413,7 +413,7 @@ public:
|
||||
/// Allow store merging after legalization in addition to before legalization.
|
||||
/// This may catch stores that do not exist earlier (eg, stores created from
|
||||
/// intrinsics).
|
||||
virtual bool mergeStoresAfterLegalization() const { return false; }
|
||||
virtual bool mergeStoresAfterLegalization() const { return true; }
|
||||
|
||||
/// Returns if it's reasonable to merge stores to MemVT size.
|
||||
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
|
||||
|
@ -9562,8 +9562,6 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
|
||||
static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
|
||||
SelectionDAG &DAG,
|
||||
const AArch64Subtarget *Subtarget) {
|
||||
if (!DCI.isBeforeLegalize())
|
||||
return SDValue();
|
||||
|
||||
StoreSDNode *S = cast<StoreSDNode>(N);
|
||||
if (S->isVolatile() || S->isIndexed())
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
define { i192, i192, i21, i192 } @foo(i192) {
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: stp xzr, xzr, [x8]
|
||||
; CHECK-DAG: str xzr, [x8, #16]
|
||||
; CHECK-DAG: str q0, [x8]
|
||||
ret { i192, i192, i21, i192 } {i192 0, i192 1, i21 2, i192 3}
|
||||
}
|
||||
|
@ -19,7 +19,7 @@ entry:
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Strh_zero_4
|
||||
; CHECK: stp wzr, wzr
|
||||
; CHECK: str xzr
|
||||
; CHECK-STRICT-LABEL: Strh_zero_4
|
||||
; CHECK-STRICT: strh wzr
|
||||
; CHECK-STRICT: strh wzr
|
||||
@ -137,7 +137,7 @@ entry:
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Sturh_zero_4
|
||||
; CHECK: stp wzr, wzr
|
||||
; CHECK: stur xzr
|
||||
; CHECK-STRICT-LABEL: Sturh_zero_4
|
||||
; CHECK-STRICT: sturh wzr
|
||||
; CHECK-STRICT: sturh wzr
|
||||
|
@ -32,11 +32,9 @@ define void @test_simple(i32 %n, ...) {
|
||||
; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #128
|
||||
; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
|
||||
|
||||
; CHECK: mov [[GR_OFFS:w[0-9]+]], #-56
|
||||
; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
|
||||
|
||||
; CHECK: orr [[VR_OFFS:w[0-9]+]], wzr, #0xffffff80
|
||||
; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
|
||||
; CHECK: mov [[GRVR:x[0-9]+]], #-545460846720
|
||||
; CHECK: movk [[GRVR]], #65480
|
||||
; CHECK: str [[GRVR]], [x[[VA_LIST]], #24]
|
||||
|
||||
%addr = bitcast %va_list* @var to i8*
|
||||
call void @llvm.va_start(i8* %addr)
|
||||
@ -70,11 +68,9 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
|
||||
; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #112
|
||||
; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
|
||||
|
||||
; CHECK: mov [[GR_OFFS:w[0-9]+]], #-40
|
||||
; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
|
||||
|
||||
; CHECK: mov [[VR_OFFS:w[0-9]+]], #-11
|
||||
; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
|
||||
; CHECK: mov [[GRVR_OFFS:x[0-9]+]], #-40
|
||||
; CHECK: movk [[GRVR_OFFS]], #65424, lsl #32
|
||||
; CHECK: str [[GRVR_OFFS]], [x[[VA_LIST]], #24]
|
||||
|
||||
%addr = bitcast %va_list* @var to i8*
|
||||
call void @llvm.va_start(i8* %addr)
|
||||
|
@ -35,7 +35,7 @@ define void @test_tailcall_explicit_sret_alloca_unused() #0 {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_dummyusers:
|
||||
; CHECK: ldr [[PTRLOAD1:x[0-9]+]], [x0]
|
||||
; CHECK: ldr [[PTRLOAD1:q[0-9]+]], [x0]
|
||||
; CHECK: str [[PTRLOAD1]], [sp]
|
||||
; CHECK: mov x8, sp
|
||||
; CHECK-NEXT: bl _test_explicit_sret
|
||||
@ -64,8 +64,8 @@ define void @test_tailcall_explicit_sret_gep(i1024* %ptr) #0 {
|
||||
; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
|
||||
; CHECK: mov x8, sp
|
||||
; CHECK-NEXT: bl _test_explicit_sret
|
||||
; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
|
||||
; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
|
||||
; CHECK-NEXT: ldr [[CALLERSRET1:q[0-9]+]], [sp]
|
||||
; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]]
|
||||
; CHECK: ret
|
||||
define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 {
|
||||
%l = alloca i1024, align 8
|
||||
@ -79,8 +79,8 @@ define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 {
|
||||
; CHECK-DAG: mov [[FPTR:x[0-9]+]], x0
|
||||
; CHECK: mov x0, sp
|
||||
; CHECK-NEXT: blr [[FPTR]]
|
||||
; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
|
||||
; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
|
||||
; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp]
|
||||
; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]]
|
||||
; CHECK: ret
|
||||
define void @test_indirect_tailcall_explicit_sret_nosret_arg(i1024* sret %arg, void (i1024*)* %f) #0 {
|
||||
%l = alloca i1024, align 8
|
||||
@ -94,8 +94,8 @@ define void @test_indirect_tailcall_explicit_sret_nosret_arg(i1024* sret %arg, v
|
||||
; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
|
||||
; CHECK: mov x8, sp
|
||||
; CHECK-NEXT: blr x0
|
||||
; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
|
||||
; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
|
||||
; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp]
|
||||
; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]]
|
||||
; CHECK: ret
|
||||
define void @test_indirect_tailcall_explicit_sret_(i1024* sret %arg, i1024 ()* %f) #0 {
|
||||
%ret = tail call i1024 %f()
|
||||
|
@ -11,8 +11,8 @@ declare i1024 @test_sret() #0
|
||||
; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
|
||||
; CHECK: mov x8, sp
|
||||
; CHECK-NEXT: bl _test_sret
|
||||
; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
|
||||
; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
|
||||
; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp]
|
||||
; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]]
|
||||
; CHECK: ret
|
||||
define i1024 @test_call_sret() #0 {
|
||||
%a = call i1024 @test_sret()
|
||||
@ -23,8 +23,8 @@ define i1024 @test_call_sret() #0 {
|
||||
; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
|
||||
; CHECK: mov x8, sp
|
||||
; CHECK-NEXT: bl _test_sret
|
||||
; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
|
||||
; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
|
||||
; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp]
|
||||
; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]]
|
||||
; CHECK: ret
|
||||
define i1024 @test_tailcall_sret() #0 {
|
||||
%a = tail call i1024 @test_sret()
|
||||
@ -35,8 +35,8 @@ define i1024 @test_tailcall_sret() #0 {
|
||||
; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
|
||||
; CHECK: mov x8, sp
|
||||
; CHECK-NEXT: blr x0
|
||||
; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
|
||||
; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
|
||||
; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp]
|
||||
; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]]
|
||||
; CHECK: ret
|
||||
define i1024 @test_indirect_tailcall_sret(i1024 ()* %f) #0 {
|
||||
%a = tail call i1024 %f()
|
||||
|
@ -251,8 +251,7 @@ entry:
|
||||
|
||||
; R600: MOVA_INT
|
||||
|
||||
; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ; encoding:
|
||||
; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:5 ; encoding:
|
||||
; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ; encoding:
|
||||
|
||||
; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ; encoding: [0x04,0x00,0x60,0xe0
|
||||
; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:5 ; encoding: [0x05,0x00,0x60,0xe0
|
||||
|
@ -817,25 +817,37 @@ define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
|
||||
|
||||
; CHECK-ALL-LABEL: test_insertelement:
|
||||
; CHECK-ALL: sub sp, sp, #8
|
||||
; CHECK-ALL: ldrh
|
||||
; CHECK-ALL: ldrh
|
||||
; CHECK-ALL: ldrh
|
||||
; CHECK-ALL: ldrh
|
||||
; CHECK-ALL-DAG: strh
|
||||
; CHECK-ALL-DAG: strh
|
||||
; CHECK-ALL-DAG: mov
|
||||
; CHECK-ALL-DAG: ldrh
|
||||
; CHECK-ALL-DAG: orr
|
||||
; CHECK-ALL-DAG: strh
|
||||
; CHECK-ALL-DAG: strh
|
||||
; CHECK-ALL-DAG: strh
|
||||
; CHECK-ALL-DAG: ldrh
|
||||
; CHECK-ALL-DAG: ldrh
|
||||
; CHECK-ALL-DAG: ldrh
|
||||
; CHECK-ALL-DAG: strh
|
||||
; CHECK-ALL-DAG: strh
|
||||
; CHECK-ALL-DAG: strh
|
||||
; CHECK-ALL-DAG: strh
|
||||
|
||||
; CHECK-VFP: and
|
||||
; CHECK-VFP: mov
|
||||
; CHECK-VFP: ldrd
|
||||
; CHECK-VFP: orr
|
||||
; CHECK-VFP: ldrh
|
||||
; CHECK-VFP: stm
|
||||
; CHECK-VFP: strh
|
||||
; CHECK-VFP: ldm
|
||||
; CHECK-VFP: stm
|
||||
|
||||
; CHECK-NOVFP: ldrh
|
||||
; CHECK-NOVFP: ldrh
|
||||
; CHECK-NOVFP: ldrh
|
||||
; CHECK-NOVFP: ldrh
|
||||
; CHECK-NOVFP-DAG: strh
|
||||
; CHECK-NOVFP-DAG: strh
|
||||
; CHECK-NOVFP-DAG: mov
|
||||
; CHECK-NOVFP-DAG: ldrh
|
||||
; CHECK-NOVFP-DAG: orr
|
||||
; CHECK-NOVFP-DAG: strh
|
||||
; CHECK-NOVFP-DAG: strh
|
||||
; CHECK-NOVFP-DAG: strh
|
||||
; CHECK-NOVFP-DAG: ldrh
|
||||
; CHECK-NOVFP-DAG: ldrh
|
||||
; CHECK-NOVFP-DAG: ldrh
|
||||
; CHECK-NOVFP-DAG: strh
|
||||
; CHECK-NOVFP-DAG: strh
|
||||
; CHECK-NOVFP-DAG: strh
|
||||
; CHECK-NOVFP-DAG: strh
|
||||
|
||||
; CHECK-ALL: add sp, sp, #8
|
||||
define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
|
||||
%a = load half, half* %p, align 2
|
||||
|
@ -14,12 +14,10 @@
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 section "socket1" {
|
||||
; EL: r1 = 134678021
|
||||
; EB: r1 = 84281096
|
||||
; CHECK: *(u32 *)(r10 - 8) = r1
|
||||
; EL: r1 = 2569
|
||||
; EB: r1 = 2314
|
||||
; CHECK: *(u16 *)(r10 - 4) = r1
|
||||
|
||||
; EL: r1 = 11033905661445 ll
|
||||
; EB: r1 = 361984551142686720 ll
|
||||
; CHECK: *(u64 *)(r10 - 8) = r1
|
||||
|
||||
; CHECK: r1 = 0
|
||||
; CHECK: *(u16 *)(r10 + 24) = r1
|
||||
@ -35,7 +33,6 @@ define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 s
|
||||
; CHECK: *(u16 *)(r10 + 4) = r1
|
||||
; CHECK: *(u16 *)(r10 + 2) = r1
|
||||
; CHECK: *(u16 *)(r10 + 0) = r1
|
||||
; CHECK: *(u16 *)(r10 - 2) = r1
|
||||
; CHECK: *(u16 *)(r10 + 26) = r1
|
||||
|
||||
; CHECK: r2 = r10
|
||||
|
@ -821,8 +821,10 @@ entry:
|
||||
; MIPS32R5: jal
|
||||
; MIPS32R5: sw $2, {{[0-9]+}}($sp)
|
||||
|
||||
; MIPS32R5-DAG: sb ${{[0-9]+}}, 1(${{[0-9]+}})
|
||||
; MIPS32R5-DAG; sb ${{[0-9]+}}, %lo(gv2i8)(${{[0-9]+}})
|
||||
; MIPS32R5-DAG; sh ${{[0-9]+}}, %lo(gv2i8)(${{[0-9]+}})
|
||||
|
||||
; MIPS32R5-NOT: sb ${{[0-9]+}}, 1(${{[0-9]+}})
|
||||
; MIPS32R5-NOT; sb ${{[0-9]+}}, %lo(gv2i8)(${{[0-9]+}})
|
||||
|
||||
; MIPS64EB: daddiu $4, $zero, 1543
|
||||
; MIPS64EB: daddiu $5, $zero, 3080
|
||||
@ -870,14 +872,14 @@ entry:
|
||||
; MIPS32-NOT: ori $6
|
||||
; MIPS32-NOT: ori $7
|
||||
|
||||
; MIPS32R5-DAG: lw $4, {{[0-9]+}}($sp)
|
||||
; MIPS32R5-DAG: lw $5, {{[0-9]+}}($sp)
|
||||
; MIPS32R5-NOT: lw $4, {{[0-9]+}}($sp)
|
||||
; MIPS32R5-NOT: lw $5, {{[0-9]+}}($sp)
|
||||
|
||||
; MIPS64: ori $4
|
||||
; MIPS64: ori $5
|
||||
|
||||
; MIPS64R5: lw $4
|
||||
; MIPS64R5: lw $5
|
||||
; MIPS64R5-NOT: lw $4
|
||||
; MIPS64R5-NOT: lw $5
|
||||
|
||||
; MIPS32: jal i8_4
|
||||
; MIPS64: jalr $25
|
||||
@ -996,14 +998,14 @@ entry:
|
||||
; MIPS32-DAG: ori $4
|
||||
; MIPS32-DAG: ori $5
|
||||
|
||||
; MIPS32R5-DAG: lw $4
|
||||
; MIPS32R5-DAG: lw $5
|
||||
; MIPS32R5-NOT: lw $4
|
||||
; MIPS32R5-NOT: lw $5
|
||||
|
||||
; MIPS64: ori $4
|
||||
; MIPS64: ori $5
|
||||
|
||||
; MIPS64R5-DAG: lw $4
|
||||
; MIPS64R5-DAG: lw $5
|
||||
; MIPS64R5-NOT: lw $4
|
||||
; MIPS64R5-NOT: lw $5
|
||||
|
||||
; MIPS32: jal i16_2
|
||||
; MIPS64: jalr $25
|
||||
@ -1037,8 +1039,8 @@ entry:
|
||||
; MIPS64-DAG: daddiu $4
|
||||
; MIPS64-DAG: daddiu $5
|
||||
|
||||
; MIPS64R5-DAG: ld $4
|
||||
; MIPS64R5-DAG: ld $5
|
||||
; MIPS64R5-NOT: ld $4
|
||||
; MIPS64R5-NOT: ld $5
|
||||
|
||||
; MIPS32: jal i16_4
|
||||
; MIPS64: jalr $25
|
||||
@ -1133,8 +1135,8 @@ entry:
|
||||
; MIPS64: daddiu $4
|
||||
; MIPS64: daddiu $5
|
||||
|
||||
; MIPS64R5-DAG: ld $4
|
||||
; MIPS64R5-DAG: ld $5
|
||||
; MIPS64R5-NOT ld $4
|
||||
; MIPS64R5-NOT: ld $5
|
||||
|
||||
; MIPS32: jal i32_2
|
||||
; MIPS64: jalr $25
|
||||
|
@ -12,8 +12,7 @@ define i1 @via_stack_bug(i8 signext %idx) {
|
||||
|
||||
; ALL-LABEL: via_stack_bug:
|
||||
; ALL-DAG: addiu [[ONE:\$[0-9]+]], $zero, 1
|
||||
; ALL-DAG: sb [[ONE]], 7($sp)
|
||||
; ALL-DAG: sb $zero, 6($sp)
|
||||
; ALL-DAG: sh [[ONE]], 6($sp)
|
||||
; ALL-DAG: andi [[MASKED_IDX:\$[0-9]+]], $4, 1
|
||||
; ALL-DAG: addiu [[VPTR:\$[0-9]+]], $sp, 6
|
||||
; ALL-DAG: or [[EPTR:\$[0-9]+]], [[MASKED_IDX]], [[VPTR]]
|
||||
|
@ -22,10 +22,8 @@ define void @f1(fp128 *%x) {
|
||||
; so this goes through memory.
|
||||
define void @f2(fp128 *%a, i128 *%b) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: lg
|
||||
; CHECK: lg
|
||||
; CHECK: stg
|
||||
; CHECK: stg
|
||||
; CHECK: vl
|
||||
; CHECK: vst
|
||||
; CHECK: br %r14
|
||||
%val = load i128 , i128 *%b
|
||||
%res = bitcast i128 %val to fp128
|
||||
|
Loading…
Reference in New Issue
Block a user