[Statepoints] Update DAG root after emitting statepoint.

Since we always generate CopyToRegs for statepoint results,
we must update DAG root after emitting statepoint, so that
these copies are scheduled before any possible local uses.
Note: getControlRoot() flushes all PendingExports, not only
those we generates for relocates. If that'll become a problem,
we can change it to flushing relocate exports only.

Reviewed By: reames

Differential Revision: https://reviews.llvm.org/D87251
This commit is contained in:
Denis Antrushin 2020-09-07 22:04:07 +07:00
parent a1a1eb4962
commit a5e6ed283a
2 changed files with 92 additions and 3 deletions

View File

@ -841,7 +841,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
Register Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), Reg, RetTy, None);
SDValue Chain = DAG.getEntryNode();
SDValue Chain = DAG.getRoot();
RFV.getCopyToRegs(Relocated, DAG, getCurSDLoc(), Chain, nullptr);
PendingExports.push_back(Chain);
@ -919,8 +919,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
// Remove original call node
DAG.DeleteNode(CallNode);
// DON'T set the root - under the assumption that it's already set past the
// inserted node we created.
// Since we always emit CopyToRegs (even for local relocates), we must
// update root, so that they are emitted before any local uses.
(void)getControlRoot();
// TODO: A better future implementation would be to emit a single variable
// argument, variable return value STATEPOINT node here and then hookup the

View File

@ -8,8 +8,12 @@ declare i1 @return_i1()
declare void @func()
declare void @"some_call"(i64 addrspace(1)*)
declare void @consume(i32 addrspace(1)*)
declare i32 @consume1(i32) gc "statepoint-example"
declare void @consume2(i32 addrspace(1)*, i32 addrspace(1)*)
declare void @consume3(float) gc "statepoint-example"
declare float @consume4(i64) gc "statepoint-example"
declare void @consume5(i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*)
declare void @use1(i32 addrspace(1)*, i8 addrspace(1)*)
declare i32 @"personality_function"()
@ -590,6 +594,90 @@ entry:
ret void
}
; test multiple statepoints/relocates within single block.
; relocates must be properly scheduled w.r.t. statepoints
define void @test_sched(float %0, i32 %1, i8 addrspace(1)* %2) gc "statepoint-example" {
; CHECK-LABEL: test_sched:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset %rbx, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rsi, %rbx
; CHECK-NEXT: movl %edi, %ebp
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: callq consume3
; CHECK-NEXT: .Ltmp25:
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2sd %ebp, %xmm0
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: nopl 8(%rax,%rax)
; CHECK-NEXT: .Ltmp26:
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movss %xmm0, (%rsp)
; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: nopl 8(%rax,%rax)
; CHECK-NEXT: .Ltmp27:
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movss %xmm0, (%rsp)
; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: nopl 8(%rax,%rax)
; CHECK-NEXT: .Ltmp28:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: xorpd %xmm0, %xmm0
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: ucomisd %xmm0, %xmm1
; CHECK-NEXT: movabsq $9223372036854775807, %rdi # imm = 0x7FFFFFFFFFFFFFFF
; CHECK-NEXT: cmovbeq %rax, %rdi
; CHECK-NEXT: movsd %xmm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movss %xmm0, (%rsp)
; CHECK-NEXT: nopl 8(%rax,%rax)
; CHECK-NEXT: .Ltmp29:
; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%token0 = call token (i64, i32, void (float)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 2, i32 0, void (float)* nonnull @consume3, i32 1, i32 0, float %0, i32 0, i32 0) [ "gc-live"(i8 addrspace(1)* %2) ]
%reloc1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token0, i32 0, i32 0) ; (%2, %2)
%tmp1 = sitofp i32 %1 to double
%to_max.i29 = fcmp ogt double %tmp1, 0.000000e+00
%token1 = call token (i64, i32, i32 (i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 2, i32 5, i32 (i32)* nonnull @consume1, i32 1, i32 0, i32 undef, i32 0, i32 0) [ "gc-live"(i8 addrspace(1)* %reloc1) ]
%reloc2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token1, i32 0, i32 0) ; (%reloc1, %reloc1)
%reloc3 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token1, i32 0, i32 0) ; (%reloc1, %reloc1)
%token2 = call token (i64, i32, i32 (i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 2, i32 5, i32 (i32)* nonnull @consume1, i32 1, i32 0, i32 undef, i32 0, i32 0) [ "deopt"(float %0, double %tmp1), "gc-live"(i8 addrspace(1)* %reloc2, i8 addrspace(1)* %reloc3) ]
%reloc4 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token2, i32 0, i32 0) ; (%reloc3, %reloc2)
%reloc5 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token2, i32 1, i32 1) ; (%reloc3, %reloc3)
%token3 = call token (i64, i32, void (float)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 2, i32 5, void (float)* nonnull @consume3, i32 1, i32 0, float %0, i32 0, i32 0) [ "deopt"(float %0, double %tmp1), "gc-live"(i8 addrspace(1)* %reloc4, i8 addrspace(1)* %reloc5) ]
%reloc6 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token3, i32 1, i32 0) ; (%reloc5, %reloc4)
%tmp5 = select i1 %to_max.i29, i64 9223372036854775807, i64 0
%token4 = call token (i64, i32, float (i64)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32i64f(i64 2, i32 5, float (i64)* nonnull @consume4, i32 1, i32 0, i64 %tmp5, i32 0, i32 0) [ "deopt"(float %0, double %tmp1), "gc-live"() ]
ret void
}
declare token @llvm.experimental.gc.statepoint.p0f_f32i64f(i64 immarg, i32 immarg, float (i64)*, i32 immarg, i32 immarg, ...)
declare token @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 immarg, i32 immarg, i32 (i32)*, i32 immarg, i32 immarg, ...)
declare token @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 immarg, i32 immarg, void (float)*, i32 immarg, i32 immarg, ...)
declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)