[SelectionDAG] Fix CombineToPreIndexedLoadStore O(n^2) behavior

This patch consists of two parts: a performance fix in DAGCombiner.cpp
and a correctness fix in SelectionDAG.cpp.

The test case tests the bug that's uncovered by the performance fix, and
fixed by the correctness fix.

The performance fix keeps the containers required by the
hasPredecessorHelper (which is a lazy DFS) and reuse them. Since
hasPredecessorHelper is called in a loop, the overall efficiency reduced
from O(n^2) to O(n), where n is the number of SDNodes.

The correctness fix keeps iterating the neighbor list even if it's time
to early return. It will return after finishing adding all neighbors to
Worklist, so that no neighbors are discarded due to the original early
return.

llvm-svn: 259691
This commit is contained in:
Tim Shen 2016-02-03 20:58:55 +00:00
parent 0262ca1fce
commit c6dc619045
3 changed files with 34 additions and 6 deletions

View File

@ -9593,6 +9593,10 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
return false;
}
// Caches for hasPredecessorHelper.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
// If the offset is a constant, there may be other adds of constants that
// can be folded with this one. We should do this to avoid having to keep
// a copy of the original base pointer.
@ -9607,7 +9611,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
continue;
if (Use.getUser()->isPredecessorOf(N))
if (N->hasPredecessorHelper(Use.getUser(), Visited, Worklist))
continue;
if (Use.getUser()->getOpcode() != ISD::ADD &&
@ -9637,10 +9641,6 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// Now check for #3 and #4.
bool RealUse = false;
// Caches for hasPredecessorHelper
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
for (SDNode *Use : Ptr.getNode()->uses()) {
if (Use == N)
continue;

View File

@ -6949,13 +6949,16 @@ SDNode::hasPredecessorHelper(const SDNode *N,
// Haven't visited N yet. Continue the search.
while (!Worklist.empty()) {
const SDNode *M = Worklist.pop_back_val();
bool Found = false;
for (const SDValue &OpV : M->op_values()) {
SDNode *Op = OpV.getNode();
if (Visited.insert(Op).second)
Worklist.push_back(Op);
if (Op == N)
return true;
Found = true;
}
if (Found)
return true;
}
return false;

View File

@ -0,0 +1,25 @@
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
; CHECK-LABEL: TestFoo:
; CHECK: std
; CHECK: bl TestBar
; CHECK: stbu
; CHECK: std
; CHECK: blr
%StructA = type <{ i64, { i64, i64 }, { i64, i64 } }>
define void @TestFoo(%StructA* %this) {
%tmp = getelementptr inbounds %StructA, %StructA* %this, i64 0, i32 1
%tmp11 = getelementptr inbounds %StructA, %StructA* %this, i64 0, i32 1, i32 1
%tmp12 = bitcast { i64, i64 }* %tmp to i64**
store i64* %tmp11, i64** %tmp12
call void @TestBar()
%tmp13 = getelementptr inbounds %StructA, %StructA* %this, i64 0, i32 2, i32 1
store i64* %tmp13, i64** undef
%.cast.i.i.i = bitcast i64* %tmp13 to i8*
store i8 0, i8* %.cast.i.i.i
ret void
}
declare void @TestBar()