[InstCombine] Fix worklist management in DSE (PR44552)

Fixes https://bugs.llvm.org/show_bug.cgi?id=44552. We need to make sure that the store is reprocessed, because performing DSE may expose more DSE opportunities. There is a slight caveat here though: We need to make sure that we add back the store the worklist first, because that means it will be processed after the operands of the removed store have been processed. This is a general bug in InstCombine worklist management that I hope to address at some point, but for now it means we need to do this manually rather than just returning the instruction as changed. Differential Revision: https://reviews.llvm.org/D72807
2025-01-19 02:03:11 +00:00 · 2020-01-15 22:24:15 +01:00 · 2020-01-15 22:24:15 +01:00 · 68481bfa6d
commit 68481bfa6d
parent 50ac6afd84
2 changed files with 64 additions and 2 deletions
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@ -1439,9 +1439,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
      if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
                                                        SI.getOperand(1))) {
        ++NumDeadStore;
-        ++BBI;
+        // Manually add back the original store to the worklist now, so it will
        // be processed after the operands of the removed store, as this may
        // expose additional DSE opportunities.
        Worklist.Add(&SI);
        eraseInstFromFunction(*PrevSI);
-        continue;
+        return nullptr;
      }
      break;
    }
--- a/test/Transforms/InstCombine/pr44552.ll
+++ b/test/Transforms/InstCombine/pr44552.ll
@ -0,0 +1,59 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -instcombine -instcombine-infinite-loop-threshold=2 < %s | FileCheck %s
 ; This used to require 10 instcombine iterations to fully optimize.
 ; The number of iterations grew linearly with the number of DSEd stores,
 ; resulting in overall quadratic runtime.
 %struct.S3 = type { i64 }
@csmith_sink_ = dso_local global i64 0, align 1
@g_302_7 = internal constant i32 0, align 1
@g_313_0 = internal global i16 0, align 1
@g_313_1 = internal global i32 0, align 1
@g_313_2 = internal global i32 0, align 1
@g_313_3 = internal global i32 0, align 1
@g_313_4 = internal global i16 0, align 1
@g_313_5 = internal global i16 0, align 1
@g_313_6 = internal global i16 0, align 1
@g_316 = internal global %struct.S3 zeroinitializer, align 1
@g_316_1_0 = internal global i16 0, align 1
 define i16 @main() {
 ; CHECK-LABEL: @main(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i64 0, i64* @csmith_sink_, align 8
 ; CHECK-NEXT:    ret i16 0
 ;
 entry:
  store i64 0, i64* @csmith_sink_, align 1
  %0 = load i16, i16* @g_313_0, align 1
  %conv2 = sext i16 %0 to i64
  store i64 %conv2, i64* @csmith_sink_, align 1
  %1 = load i32, i32* @g_313_1, align 1
  %conv3 = zext i32 %1 to i64
  store i64 %conv3, i64* @csmith_sink_, align 1
  %2 = load i32, i32* @g_313_2, align 1
  %conv4 = sext i32 %2 to i64
  store i64 %conv4, i64* @csmith_sink_, align 1
  %3 = load i32, i32* @g_313_3, align 1
  %conv5 = zext i32 %3 to i64
  store i64 %conv5, i64* @csmith_sink_, align 1
  %4 = load i16, i16* @g_313_4, align 1
  %conv6 = sext i16 %4 to i64
  store i64 %conv6, i64* @csmith_sink_, align 1
  %5 = load i16, i16* @g_313_5, align 1
  %conv7 = sext i16 %5 to i64
  store i64 %conv7, i64* @csmith_sink_, align 1
  %6 = load i16, i16* @g_313_6, align 1
  %conv8 = sext i16 %6 to i64
  store i64 %conv8, i64* @csmith_sink_, align 1
  %7 = load i64, i64* getelementptr inbounds (%struct.S3, %struct.S3* @g_316, i32 0, i32 0), align 1
  store i64 %7, i64* @csmith_sink_, align 1
  %8 = load i16, i16* @g_316_1_0, align 1
  %conv9 = sext i16 %8 to i64
  store i64 %conv9, i64* @csmith_sink_, align 1
  store i64 0, i64* @csmith_sink_, align 1
  ret i16 0
 }