From 45c75443394b92a8ba8e2474a393039feb5b7d78 Mon Sep 17 00:00:00 2001 From: Shuxin Yang Date: Tue, 4 Jun 2013 01:00:57 +0000 Subject: [PATCH] Fix a defect in code-layout pass, improving Benchmarks/Olden/em3d/em3d by about 30% (4.58s vs 3.2s on an oldish Mac Tower). The corresponding src is excerpted bellow. The lopp accounts for about 90% of execution time. -------------------- cat -n test-suite/MultiSource/Benchmarks/Olden/em3d/make_graph.c 90 91 for (k=0; kto_nodes[k]) break; The defective layout is sketched bellow, where the two branches need to swap. ------------------------------------------------------------------------ L: ... if (cond) goto out-of-loop goto L While this code sequence is defective, I don't understand why it incurs 1/3 of execution time. CPU-event-profiling indicates the poor laoyout dose not increase in br-misprediction; it dosen't increase stall cycle at all, and it dosen't prevent the CPU detect the loop (i.e. Loop-Stream-Detector seems to be working fine as well)... The root cause of the problem is that the layout pass calls AnalyzeBranch() with basic-block which is not updated to reflect its current layout. rdar://13966341 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183174 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index bfba503b351..4b0f7f38fcc 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -991,6 +991,28 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { Cond.clear(); MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // The "PrevBB" is not yet updated to reflect current code layout, so, + // o. it may fall-through to a block without explict "goto" instruction + // before layout, and no longer fall-through it after layout; or + // o. just opposite. + // + // AnalyzeBranch() may return erroneous value for FBB when these two + // situations take place. For the first scenario FBB is mistakenly set + // NULL; for the 2nd scenario, the FBB, which is expected to be NULL, + // is mistakenly pointing to "*BI". + // + bool needUpdateBr = true; + if (!Cond.empty() && (!FBB || FBB == *BI)) { + PrevBB->updateTerminator(); + needUpdateBr = false; + Cond.clear(); + TBB = FBB = 0; + if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // FIXME: This should never take place. + TBB = FBB = 0; + } + } + // If PrevBB has a two-way branch, try to re-order the branches // such that we branch to the successor with higher weight first. if (TBB && !Cond.empty() && FBB && @@ -1003,8 +1025,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { DebugLoc dl; // FIXME: this is nowhere TII->RemoveBranch(*PrevBB); TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); + needUpdateBr = true; } - PrevBB->updateTerminator(); + if (needUpdateBr) + PrevBB->updateTerminator(); } }