mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-25 21:33:25 +00:00
[MBP] Don't move bottom block before header if it can't reduce taken branches
If bottom of block BB has only one successor OldTop, in most cases it is profitable to move it before OldTop, except the following case: -->OldTop<- | . | | . | | . | ---Pred | | | BB----- Move BB before OldTop can't reduce the number of taken branches, this patch detects this case and prevent the moving. Differential Revision: https://reviews.llvm.org/D57067 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352236 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2c51d60895
commit
aeff6e76be
@ -451,6 +451,8 @@ class MachineBlockPlacement : public MachineFunctionPass {
|
||||
|
||||
void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
|
||||
BlockFilterSet *BlockFilter = nullptr);
|
||||
bool canMoveBottomBlockToTop(const MachineBasicBlock *BottomBlock,
|
||||
const MachineBasicBlock *OldTop);
|
||||
MachineBasicBlock *findBestLoopTop(
|
||||
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
|
||||
MachineBasicBlock *findBestLoopExit(
|
||||
@ -1756,6 +1758,39 @@ void MachineBlockPlacement::buildChain(
|
||||
<< getBlockName(*Chain.begin()) << "\n");
|
||||
}
|
||||
|
||||
// If bottom of block BB has only one successor OldTop, in most cases it is
|
||||
// profitable to move it before OldTop, except the following case:
|
||||
//
|
||||
// -->OldTop<-
|
||||
// | . |
|
||||
// | . |
|
||||
// | . |
|
||||
// ---Pred |
|
||||
// | |
|
||||
// BB-----
|
||||
//
|
||||
// If BB is moved before OldTop, Pred needs a taken branch to BB, and it can't
|
||||
// layout the other successor below it, so it can't reduce taken branch.
|
||||
// In this case we keep its original layout.
|
||||
bool
|
||||
MachineBlockPlacement::canMoveBottomBlockToTop(
|
||||
const MachineBasicBlock *BottomBlock,
|
||||
const MachineBasicBlock *OldTop) {
|
||||
if (BottomBlock->pred_size() != 1)
|
||||
return true;
|
||||
MachineBasicBlock *Pred = *BottomBlock->pred_begin();
|
||||
if (Pred->succ_size() != 2)
|
||||
return true;
|
||||
|
||||
MachineBasicBlock *OtherBB = *Pred->succ_begin();
|
||||
if (OtherBB == BottomBlock)
|
||||
OtherBB = *Pred->succ_rbegin();
|
||||
if (OtherBB == OldTop)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Find the best loop top block for layout.
|
||||
///
|
||||
/// Look for a block which is strictly better than the loop header for laying
|
||||
@ -1800,6 +1835,9 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
|
||||
if (Pred->succ_size() > 1)
|
||||
continue;
|
||||
|
||||
if (!canMoveBottomBlockToTop(Pred, L.getHeader()))
|
||||
continue;
|
||||
|
||||
BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
|
||||
if (!BestPred || PredFreq > BestPredFreq ||
|
||||
(!(PredFreq < BestPredFreq) &&
|
||||
|
@ -96,20 +96,20 @@ declare float @llvm.fabs.f32(float) nounwind readnone
|
||||
; FUNC-LABEL: {{^}}loop_land_info_assert:
|
||||
; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}}
|
||||
; SI: s_and_b64 [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]]
|
||||
; SI: s_mov_b64 vcc, [[CMP4M]]
|
||||
; SI-NEXT: s_cbranch_vccnz [[CONVEX_EXIT:BB[0-9_]+]]
|
||||
; SI-NEXT: s_branch [[FOR_COND_PREHDR:BB[0-9_]+]]
|
||||
; SI: s_branch [[INFLOOP:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; SI: [[CONVEX_EXIT:BB[0-9_]+]]
|
||||
; SI: s_mov_b64 vcc,
|
||||
; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]]
|
||||
; SI: s_cbranch_vccnz [[INFLOOP]]
|
||||
|
||||
; SI: ; %if.else
|
||||
; SI: buffer_store_dword
|
||||
|
||||
; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]:
|
||||
; SI: [[INFLOOP]]:
|
||||
; SI: s_cbranch_vccnz [[CONVEX_EXIT]]
|
||||
|
||||
; SI: [[CONVEX_EXIT]]:
|
||||
; SI: s_mov_b64 vcc,
|
||||
; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]]
|
||||
; SI: s_branch [[INFLOOP]]
|
||||
; SI-NEXT: [[FOR_COND_PREHDR]]:
|
||||
; SI: ; %for.cond.preheader
|
||||
; SI: s_cbranch_vccz [[ENDPGM]]
|
||||
|
||||
; SI: [[ENDPGM]]:
|
||||
|
@ -24,8 +24,8 @@ define linkonce_odr void @ZN6snappyDecompressor_(%"class.snappy::SnappyDecompres
|
||||
; CHECK-DAG: addi 25, 3, _ZN6snappy8internalL8wordmaskE@toc@l
|
||||
; CHECK-DAG: addis 5, 2, _ZN6snappy8internalL10char_tableE@toc@ha
|
||||
; CHECK-DAG: addi 24, 5, _ZN6snappy8internalL10char_tableE@toc@l
|
||||
; CHECK: b .LBB0_2
|
||||
; CHECK: .LBB0_2: # %for.cond
|
||||
; CHECK: b .[[LABEL1:[A-Z0-9_]+]]
|
||||
; CHECK: .[[LABEL1]]: # %for.cond
|
||||
; CHECK-NOT: addis {{[0-9]+}}, 2, _ZN6snappy8internalL8wordmaskE@toc@ha
|
||||
; CHECK-NOT: addis {{[0-9]+}}, 2, _ZN6snappy8internalL10char_tableE@toc@ha
|
||||
; CHECK: bctrl
|
||||
|
@ -20,22 +20,7 @@ define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp {
|
||||
; CHECK-NEXT: movq %rdi, %rbx
|
||||
; CHECK-NEXT: orq $2097152, %r14 ## imm = 0x200000
|
||||
; CHECK-NEXT: andl $15728640, %r14d ## imm = 0xF00000
|
||||
; CHECK-NEXT: jmp LBB0_1
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: LBB0_3: ## %bb.i
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movl 0, %eax
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtsi2ssq %rax, %xmm0
|
||||
; CHECK-NEXT: movl 4, %eax
|
||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||
; CHECK-NEXT: cvtsi2ssq %rax, %xmm1
|
||||
; CHECK-NEXT: movl 8, %eax
|
||||
; CHECK-NEXT: xorps %xmm2, %xmm2
|
||||
; CHECK-NEXT: cvtsi2ssq %rax, %xmm2
|
||||
; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
||||
; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
|
||||
; CHECK-NEXT: movaps %xmm0, 0
|
||||
; CHECK-NEXT: LBB0_1: ## %bb4
|
||||
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
@ -50,7 +35,21 @@ define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp {
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: cmpq $1048576, %r14 ## imm = 0x100000
|
||||
; CHECK-NEXT: jne LBB0_1
|
||||
; CHECK-NEXT: jmp LBB0_3
|
||||
; CHECK-NEXT: ## %bb.3: ## %bb.i
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movl 0, %eax
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtsi2ssq %rax, %xmm0
|
||||
; CHECK-NEXT: movl 4, %eax
|
||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||
; CHECK-NEXT: cvtsi2ssq %rax, %xmm1
|
||||
; CHECK-NEXT: movl 8, %eax
|
||||
; CHECK-NEXT: xorps %xmm2, %xmm2
|
||||
; CHECK-NEXT: cvtsi2ssq %rax, %xmm2
|
||||
; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
||||
; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
|
||||
; CHECK-NEXT: movaps %xmm0, 0
|
||||
; CHECK-NEXT: jmp LBB0_1
|
||||
entry:
|
||||
br label %bb4
|
||||
|
||||
|
@ -35,11 +35,7 @@ define void @render() nounwind {
|
||||
; CHECK-NEXT: # %bb.1: # %for.cond5.preheader
|
||||
; CHECK-NEXT: xorl %ebx, %ebx
|
||||
; CHECK-NEXT: movb $1, %bpl
|
||||
; CHECK-NEXT: jmp .LBB2_2
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB2_5: # %if.then
|
||||
; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
|
||||
; CHECK-NEXT: callq scale
|
||||
; CHECK-NEXT: .LBB2_2: # %for.cond5
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: testb %bl, %bl
|
||||
@ -52,7 +48,10 @@ define void @render() nounwind {
|
||||
; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
|
||||
; CHECK-NEXT: vucomisd {{\.LCPI.*}}, %xmm0
|
||||
; CHECK-NEXT: jne .LBB2_5
|
||||
; CHECK-NEXT: jp .LBB2_5
|
||||
; CHECK-NEXT: jnp .LBB2_2
|
||||
; CHECK-NEXT: .LBB2_5: # %if.then
|
||||
; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
|
||||
; CHECK-NEXT: callq scale
|
||||
; CHECK-NEXT: jmp .LBB2_2
|
||||
; CHECK-NEXT: .LBB2_6: # %for.end52
|
||||
; CHECK-NEXT: addq $8, %rsp
|
||||
|
@ -15,16 +15,15 @@ define void @func() {
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: .LBB0_1: # %bb56
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: jmp .LBB0_2
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_3: # %bb35
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: .LBB0_2: # %bb33
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: jne .LBB0_2
|
||||
; CHECK-NEXT: jmp .LBB0_3
|
||||
; CHECK-NEXT: # %bb.3: # %bb35
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: jmp .LBB0_2
|
||||
bb1:
|
||||
br i1 undef, label %L_10, label %L_10
|
||||
|
||||
|
36
test/CodeGen/X86/code_placement_no_header_change.ll
Normal file
36
test/CodeGen/X86/code_placement_no_header_change.ll
Normal file
@ -0,0 +1,36 @@
|
||||
; RUN: llc -mtriple=i686-linux < %s | FileCheck %s
|
||||
|
||||
|
||||
define i32 @bar(i32 %count) {
|
||||
; Test checks that basic block backedge2 is not moved before header,
|
||||
; because it can't reduce taken branches.
|
||||
; Later backedge1 and backedge2 is rotated before loop header.
|
||||
; CHECK-LABEL: bar
|
||||
; CHECK: %.entry
|
||||
; CHECK: %.backedge1
|
||||
; CHECK: %.backedge2
|
||||
; CHECK: %.header
|
||||
; CHECK: %.exit
|
||||
.entry:
|
||||
%c = shl nsw i32 %count, 2
|
||||
br label %.header
|
||||
|
||||
.header:
|
||||
%val1 = call i32 @foo()
|
||||
%cond1 = icmp sgt i32 %val1, 1
|
||||
br i1 %cond1, label %.exit, label %.backedge1
|
||||
|
||||
.backedge1:
|
||||
%val2 = call i32 @foo()
|
||||
%cond2 = icmp sgt i32 %val2, 1
|
||||
br i1 %cond2, label %.header, label %.backedge2
|
||||
|
||||
.backedge2:
|
||||
%val3 = call i32 @foo()
|
||||
br label %.header
|
||||
|
||||
.exit:
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
declare i32 @foo()
|
@ -22,19 +22,18 @@
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- 0
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG:[0-9a-z]+]]
|
||||
; CHECK: jmp .LBB0_1
|
||||
; CHECK: .LBB0_3:
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
|
||||
; CHECK: incl %[[REG]]
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
|
||||
; CHECK: .LBB0_2:
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
|
||||
; CHECK: jne .LBB0_1
|
||||
; CHECK: # %bb.{{.*}}:
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
|
||||
; CHECK: incl %[[REG]]
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
|
||||
; CHECK: .LBB0_1:
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
|
||||
; CHECK: je .LBB0_4
|
||||
; CHECK: jne .LBB0_2
|
||||
; CHECK: # %bb.{{.*}}:
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
|
||||
; CHECK: jne .LBB0_1
|
||||
; CHECK: jmp .LBB0_3
|
||||
; CHECK: .LBB0_4:
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
|
||||
; CHECK: retq
|
||||
|
||||
source_filename = "PR37234.cpp"
|
||||
|
Loading…
x
Reference in New Issue
Block a user