[InstCombine] Fix worklist management in DSE (PR44552)

Fixes https://bugs.llvm.org/show_bug.cgi?id=44552. We need to make
sure that the store is reprocessed, because performing DSE may
expose more DSE opportunities.

There is a slight caveat here though: We need to make sure that we
add back the store the worklist first, because that means it will
be processed after the operands of the removed store have been
processed. This is a general bug in InstCombine worklist management
that I hope to address at some point, but for now it means we need
to do this manually rather than just returning the instruction as
changed.

Differential Revision: https://reviews.llvm.org/D72807
This commit is contained in:
Nikita Popov 2020-01-15 22:24:15 +01:00
parent 50ac6afd84
commit 68481bfa6d
2 changed files with 64 additions and 2 deletions

View File

@ -1439,9 +1439,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1), if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
SI.getOperand(1))) { SI.getOperand(1))) {
++NumDeadStore; ++NumDeadStore;
++BBI; // Manually add back the original store to the worklist now, so it will
// be processed after the operands of the removed store, as this may
// expose additional DSE opportunities.
Worklist.Add(&SI);
eraseInstFromFunction(*PrevSI); eraseInstFromFunction(*PrevSI);
continue; return nullptr;
} }
break; break;
} }

View File

@ -0,0 +1,59 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -instcombine -instcombine-infinite-loop-threshold=2 < %s | FileCheck %s
; This used to require 10 instcombine iterations to fully optimize.
; The number of iterations grew linearly with the number of DSEd stores,
; resulting in overall quadratic runtime.
%struct.S3 = type { i64 }
@csmith_sink_ = dso_local global i64 0, align 1
@g_302_7 = internal constant i32 0, align 1
@g_313_0 = internal global i16 0, align 1
@g_313_1 = internal global i32 0, align 1
@g_313_2 = internal global i32 0, align 1
@g_313_3 = internal global i32 0, align 1
@g_313_4 = internal global i16 0, align 1
@g_313_5 = internal global i16 0, align 1
@g_313_6 = internal global i16 0, align 1
@g_316 = internal global %struct.S3 zeroinitializer, align 1
@g_316_1_0 = internal global i16 0, align 1
define i16 @main() {
; CHECK-LABEL: @main(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i64 0, i64* @csmith_sink_, align 8
; CHECK-NEXT: ret i16 0
;
entry:
store i64 0, i64* @csmith_sink_, align 1
%0 = load i16, i16* @g_313_0, align 1
%conv2 = sext i16 %0 to i64
store i64 %conv2, i64* @csmith_sink_, align 1
%1 = load i32, i32* @g_313_1, align 1
%conv3 = zext i32 %1 to i64
store i64 %conv3, i64* @csmith_sink_, align 1
%2 = load i32, i32* @g_313_2, align 1
%conv4 = sext i32 %2 to i64
store i64 %conv4, i64* @csmith_sink_, align 1
%3 = load i32, i32* @g_313_3, align 1
%conv5 = zext i32 %3 to i64
store i64 %conv5, i64* @csmith_sink_, align 1
%4 = load i16, i16* @g_313_4, align 1
%conv6 = sext i16 %4 to i64
store i64 %conv6, i64* @csmith_sink_, align 1
%5 = load i16, i16* @g_313_5, align 1
%conv7 = sext i16 %5 to i64
store i64 %conv7, i64* @csmith_sink_, align 1
%6 = load i16, i16* @g_313_6, align 1
%conv8 = sext i16 %6 to i64
store i64 %conv8, i64* @csmith_sink_, align 1
%7 = load i64, i64* getelementptr inbounds (%struct.S3, %struct.S3* @g_316, i32 0, i32 0), align 1
store i64 %7, i64* @csmith_sink_, align 1
%8 = load i16, i16* @g_316_1_0, align 1
%conv9 = sext i16 %8 to i64
store i64 %conv9, i64* @csmith_sink_, align 1
store i64 0, i64* @csmith_sink_, align 1
ret i16 0
}