When MemoryDependenceAnalysis hits a CFG with many transparent blocks,

the algorithm easily degrades into quadratic memory and time complexity.
The easiest example is a long chain of BBs that don't otherwise use a
location. The caching will add an entry for every intermediate block and
limiting the number of results doesn't help as no results are produced
until a definition is found.

Introduce a limit similar to the existing instructions-per-block limit.
This limit counts the total number of blocks checked. If the limit is
reached, entries are considered unknown. The initial value is 1000,
which avoids regressions for normal sized functions while still
limiting edge cases to reasnable memory consumption and execution time.

Differential Revision: http://reviews.llvm.org/D16123


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@261430 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Joerg Sonnenberger 2016-02-20 11:24:44 +00:00
parent 0f386b5a21
commit e1ac460951

View File

@ -57,6 +57,11 @@ static cl::opt<unsigned> BlockScanLimit(
cl::desc("The number of instructions to scan in a block in memory "
"dependency analysis (default = 100)"));
static cl::opt<unsigned> BlockNumberLimit(
"memdep-block-number-limit", cl::Hidden, cl::init(1000),
cl::desc("The number of blocks to scan during memory "
"dependency analysis (default = 1000)"));
// Limit on the number of memdep results to process.
static const unsigned int NumResultsLimit = 100;
@ -1246,6 +1251,8 @@ bool MemoryDependenceAnalysis::getNonLocalPointerDepFromBB(
// won't get any reuse from currently inserted values, because we don't
// revisit blocks after we insert info for them.
unsigned NumSortedEntries = Cache->size();
unsigned WorklistEntries = BlockNumberLimit;
bool GotWorklistLimit = false;
DEBUG(AssertSorted(*Cache));
while (!Worklist.empty()) {
@ -1324,6 +1331,15 @@ bool MemoryDependenceAnalysis::getNonLocalPointerDepFromBB(
goto PredTranslationFailure;
}
}
if (NewBlocks.size() > WorklistEntries) {
// Make sure to clean up the Visited map before continuing on to
// PredTranslationFailure.
for (unsigned i = 0; i < NewBlocks.size(); i++)
Visited.erase(NewBlocks[i]);
GotWorklistLimit = true;
goto PredTranslationFailure;
}
WorklistEntries -= NewBlocks.size();
Worklist.append(NewBlocks.begin(), NewBlocks.end());
continue;
}
@ -1469,18 +1485,22 @@ bool MemoryDependenceAnalysis::getNonLocalPointerDepFromBB(
if (SkipFirstBlock)
return true;
for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
assert(I != Cache->rend() && "Didn't find current block??");
if (I->getBB() != BB)
bool foundBlock = false;
for (NonLocalDepEntry &I: llvm::reverse(*Cache)) {
if (I.getBB() != BB)
continue;
assert((I->getResult().isNonLocal() || !DT->isReachableFromEntry(BB)) &&
assert((GotWorklistLimit || I.getResult().isNonLocal() || \
!DT->isReachableFromEntry(BB)) &&
"Should only be here with transparent block");
I->setResult(MemDepResult::getUnknown());
Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
foundBlock = true;
I.setResult(MemDepResult::getUnknown());
Result.push_back(NonLocalDepResult(I.getBB(), I.getResult(),
Pointer.getAddr()));
break;
}
(void)foundBlock;
assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
}
// Okay, we're done now. If we added new values to the cache, re-sort it.