[JumpThreading] Conditionally freeze its condition when unfolding select

This patch fixes pr45956 (https://bugs.llvm.org/show_bug.cgi?id=45956 ).
To minimize its impact to the quality of generated code, I suggest enabling
this only for LTO as a start (it has two JumpThreading passes registered).
This patch contains a flag that makes JumpThreading enable it.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D84940
This commit is contained in:
Juneyoung Lee 2020-09-10 15:49:04 +09:00
parent 5d89de28a1
commit e4e3a3311e
4 changed files with 272 additions and 16 deletions

View File

@ -240,10 +240,12 @@ FunctionPass *createReassociatePass();
//===----------------------------------------------------------------------===//
//
// JumpThreading - Thread control through mult-pred/multi-succ blocks where some
// preds always go to some succ. Thresholds other than minus one override the
// internal BB duplication default threshold.
// preds always go to some succ. If FreezeSelectCond is true, unfold the
// condition of a select that unfolds to branch. Thresholds other than minus one
// override the internal BB duplication default threshold.
//
FunctionPass *createJumpThreadingPass(int Threshold = -1);
FunctionPass *createJumpThreadingPass(bool FreezeSelectCond = false,
int Threshold = -1);
//===----------------------------------------------------------------------===//
//

View File

@ -91,9 +91,10 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
unsigned BBDupThreshold;
unsigned DefaultBBDupThreshold;
bool InsertFreezeWhenUnfoldingSelect;
public:
JumpThreadingPass(int T = -1);
JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1);
// Glue for old PM.
bool runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_,

View File

@ -104,6 +104,11 @@ static cl::opt<bool> PrintLVIAfterJumpThreading(
cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
cl::Hidden);
static cl::opt<bool> JumpThreadingFreezeSelectCond(
"jump-threading-freeze-select-cond",
cl::desc("Freeze the condition when unfolding select"), cl::init(false),
cl::Hidden);
static cl::opt<bool> ThreadAcrossLoopHeaders(
"jump-threading-across-loop-headers",
cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
@ -133,7 +138,8 @@ namespace {
public:
static char ID; // Pass identification
JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) {
JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1)
: FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) {
initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
}
@ -166,11 +172,12 @@ INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
// Public interface to the Jump Threading pass
FunctionPass *llvm::createJumpThreadingPass(int Threshold) {
return new JumpThreading(Threshold);
FunctionPass *llvm::createJumpThreadingPass(bool InsertFr, int Threshold) {
return new JumpThreading(InsertFr, Threshold);
}
JumpThreadingPass::JumpThreadingPass(int T) {
JumpThreadingPass::JumpThreadingPass(bool InsertFr, int T) {
InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr;
DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
}
@ -2798,13 +2805,8 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
/// select is not jump-threaded, it will be folded again in the later
/// optimizations.
bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
// This transform can introduce a UB (a conditional branch that depends on a
// poison value) that was not present in the original program. See
// @TryToUnfoldSelectInCurrBB test in test/Transforms/JumpThreading/select.ll.
// This transform would reduce the quality of msan diagnostics.
// Disable this transform under MemorySanitizer.
// FIXME: either delete it or replace with a valid transform. This issue is
// not limited to MemorySanitizer (but has only been observed as an MSan false
// positive in practice so far).
if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
return false;
@ -2852,8 +2854,11 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
if (!SI)
continue;
// Expand the select.
Instruction *Term =
SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
Value *Cond = SI->getCondition();
if (InsertFreezeWhenUnfoldingSelect &&
!isGuaranteedNotToBeUndefOrPoison(Cond, SI, &DTU->getDomTree()))
Cond = new FreezeInst(Cond, "cond.fr", SI);
Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false);
BasicBlock *SplitBB = SI->getParent();
BasicBlock *NewBB = Term->getParent();
PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);

View File

@ -0,0 +1,248 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -jump-threading-freeze-select-cond -jump-threading < %s | FileCheck %s
declare void @foo()
declare void @bar()
declare void @baz()
declare void @quux()
define void @test_switch_cmp(i1 %cond, i32 %val, i8 %value) nounwind {
; CHECK-LABEL: @test_switch_cmp(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[L0:%.*]], label [[L0_THREAD:%.*]]
; CHECK: L0:
; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ [[VAL:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[VAL_PHI]], 0
; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[CMP]]
; CHECK-NEXT: br i1 [[COND_FR]], label [[L1:%.*]], label [[TMP0:%.*]]
; CHECK: 0:
; CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[VALUE:%.*]], [[L0]] ]
; CHECK-NEXT: switch i8 [[TMP1]], label [[L3:%.*]] [
; CHECK-NEXT: i8 1, label [[L1]]
; CHECK-NEXT: i8 2, label [[L2:%.*]]
; CHECK-NEXT: ]
; CHECK: L1:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: ret void
; CHECK: L2:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: ret void
; CHECK: L3:
; CHECK-NEXT: call void @baz()
; CHECK-NEXT: ret void
; CHECK: L0.thread:
; CHECK-NEXT: call void @quux()
; CHECK-NEXT: br label [[L1]]
;
entry:
br i1 %cond, label %L0, label %L4
L0:
%val.phi = phi i32 [%val, %entry], [-1, %L4]
%cmp = icmp slt i32 %val.phi, 0
%expr = select i1 %cmp, i8 1, i8 %value
switch i8 %expr, label %L3 [i8 1, label %L1 i8 2, label %L2]
L1:
call void @foo()
ret void
L2:
call void @bar()
ret void
L3:
call void @baz()
ret void
L4:
call void @quux()
br label %L0
}
define i32 @unfold3(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind {
; CHECK-LABEL: @unfold3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2
; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]]
; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD4:%.*]], label [[COND_FALSE_I:%.*]]
; CHECK: cond.false.i:
; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]]
; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_6_I:%.*]]
; CHECK: cond.false.6.i:
; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]]
; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD4]], label [[COND_FALSE_10_I:%.*]]
; CHECK: cond.false.10.i:
; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]]
; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT_THREAD]], label [[DOTEXIT:%.*]]
; CHECK: .exit:
; CHECK-NEXT: [[PHITMP:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]]
; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[PHITMP]]
; CHECK-NEXT: br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD4]]
; CHECK: .exit.thread:
; CHECK-NEXT: br label [[DOTEXIT_THREAD4]]
; CHECK: .exit.thread4:
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[ENTRY:%.*]] ], [ [[ADD3]], [[COND_FALSE_6_I]] ]
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%add3 = add nsw i32 %j, 2
%cmp.i = icmp slt i32 %u, %v
br i1 %cmp.i, label %.exit, label %cond.false.i
cond.false.i: ; preds = %entry
%cmp4.i = icmp sgt i32 %u, %v
br i1 %cmp4.i, label %.exit, label %cond.false.6.i
cond.false.6.i: ; preds = %cond.false.i
%cmp8.i = icmp slt i32 %w, %x
br i1 %cmp8.i, label %.exit, label %cond.false.10.i
cond.false.10.i: ; preds = %cond.false.6.i
%cmp13.i = icmp sgt i32 %w, %x
br i1 %cmp13.i, label %.exit, label %cond.false.15.i
cond.false.15.i: ; preds = %cond.false.10.i
%phitmp = icmp sge i32 %y, %z
br label %.exit
.exit: ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i
%cond23.i = phi i1 [ false, %entry ], [ true, %cond.false.i ], [ false, %cond.false.6.i ], [ %phitmp, %cond.false.15.i ], [ true, %cond.false.10.i ]
%j.add3 = select i1 %cond23.i, i32 %j, i32 %add3
ret i32 %j.add3
}
define i32 @unfold4(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind {
; CHECK-LABEL: @unfold4(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2
; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]]
; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_I:%.*]]
; CHECK: cond.false.i:
; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]]
; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD5:%.*]], label [[COND_FALSE_6_I:%.*]]
; CHECK: cond.false.6.i:
; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]]
; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_10_I:%.*]]
; CHECK: cond.false.10.i:
; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]]
; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT_THREAD5]], label [[DOTEXIT:%.*]]
; CHECK: .exit:
; CHECK-NEXT: [[CMP19_I:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]]
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP19_I]] to i32
; CHECK-NEXT: [[LNOT_I18:%.*]] = icmp eq i32 [[CONV]], 1
; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[LNOT_I18]]
; CHECK-NEXT: br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD5]]
; CHECK: .exit.thread:
; CHECK-NEXT: br label [[DOTEXIT_THREAD5]]
; CHECK: .exit.thread5:
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[COND_FALSE_I]] ], [ [[ADD3]], [[COND_FALSE_10_I]] ]
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%add3 = add nsw i32 %j, 2
%cmp.i = icmp slt i32 %u, %v
br i1 %cmp.i, label %.exit, label %cond.false.i
cond.false.i: ; preds = %entry
%cmp4.i = icmp sgt i32 %u, %v
br i1 %cmp4.i, label %.exit, label %cond.false.6.i
cond.false.6.i: ; preds = %cond.false.i
%cmp8.i = icmp slt i32 %w, %x
br i1 %cmp8.i, label %.exit, label %cond.false.10.i
cond.false.10.i: ; preds = %cond.false.6.i
%cmp13.i = icmp sgt i32 %w, %x
br i1 %cmp13.i, label %.exit, label %cond.false.15.i
cond.false.15.i: ; preds = %cond.false.10.i
%cmp19.i = icmp sge i32 %y, %z
%conv = zext i1 %cmp19.i to i32
br label %.exit
.exit: ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i
%cond23.i = phi i32 [ 1, %entry ], [ 0, %cond.false.i ], [ 1, %cond.false.6.i ], [ %conv, %cond.false.15.i ], [ 0, %cond.false.10.i ]
%lnot.i18 = icmp eq i32 %cond23.i, 1
%j.add3 = select i1 %lnot.i18, i32 %j, i32 %add3
ret i32 %j.add3
}
; TODO: cond23_i should be constant-folded.
define i32 @unfold5(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind {
; CHECK-LABEL: @unfold5(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2
; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]]
; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_I:%.*]]
; CHECK: cond.false.i:
; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]]
; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_6_I:%.*]]
; CHECK: cond.false.6.i:
; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]]
; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_10_I:%.*]]
; CHECK: cond.false.10.i:
; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]]
; CHECK-NEXT: br i1 [[CMP13_I]], label [[TMP0:%.*]], label [[COND_FALSE_15_I:%.*]]
; CHECK: cond.false.15.i:
; CHECK-NEXT: [[CMP19_I:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]]
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP19_I]] to i32
; CHECK-NEXT: br label [[DOTEXIT_THREAD]]
; CHECK: 0:
; CHECK-NEXT: [[COND23_I:%.*]] = phi i32 [ 7, [[COND_FALSE_10_I]] ]
; CHECK-NEXT: [[LNOT_I18:%.*]] = icmp sgt i32 [[COND23_I]], 5
; CHECK-NEXT: br label [[DOTEXIT_THREAD]]
; CHECK: .exit.thread:
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[J]], [[TMP0]] ], [ [[CONV]], [[COND_FALSE_15_I]] ], [ 1, [[COND_FALSE_6_I]] ], [ 3, [[COND_FALSE_I]] ], [ 2, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i32 [[TMP1]]
;
entry:
%add3 = add nsw i32 %j, 2
%cmp.i = icmp slt i32 %u, %v
br i1 %cmp.i, label %.exit, label %cond.false.i
cond.false.i: ; preds = %entry
%cmp4.i = icmp sgt i32 %u, %v
br i1 %cmp4.i, label %.exit, label %cond.false.6.i
cond.false.6.i: ; preds = %cond.false.i
%cmp8.i = icmp slt i32 %w, %x
br i1 %cmp8.i, label %.exit, label %cond.false.10.i
cond.false.10.i: ; preds = %cond.false.6.i
%cmp13.i = icmp sgt i32 %w, %x
br i1 %cmp13.i, label %.exit, label %cond.false.15.i
cond.false.15.i: ; preds = %cond.false.10.i
%cmp19.i = icmp sge i32 %y, %z
%conv = zext i1 %cmp19.i to i32
br label %.exit
.exit: ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i
%cond23.i = phi i32 [ 2, %entry ], [ 3, %cond.false.i ], [ 1, %cond.false.6.i ], [ %conv, %cond.false.15.i ], [ 7, %cond.false.10.i ]
%lnot.i18 = icmp sgt i32 %cond23.i, 5
%j.add3 = select i1 %lnot.i18, i32 %j, i32 %cond23.i
ret i32 %j.add3
}
define i32 @TryToUnfoldSelectInCurrBB(i1 %b, i1 %ui, i32 %s, i1 %x) {
; CHECK-LABEL: @TryToUnfoldSelectInCurrBB(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[B:%.*]], label [[IF_END_THREAD:%.*]], label [[IF_END:%.*]]
; CHECK: if.end:
; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[X:%.*]]
; CHECK-NEXT: br i1 [[COND_FR]], label [[TMP0:%.*]], label [[IF_END_THREAD]]
; CHECK: 0:
; CHECK-NEXT: br label [[IF_END_THREAD]]
; CHECK: if.end.thread:
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[S:%.*]], [[TMP0]] ], [ 42, [[IF_END]] ], [ 42, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i32 [[TMP1]]
;
entry:
br i1 %b, label %if.end, label %if.else
if.else:
br label %if.end
if.end:
%v = phi i1 [ %x, %if.else ], [ false, %entry ]
%v1 = select i1 %v, i32 %s, i32 42
ret i32 %v1
}