[InstCombine] canonicalize icmp predicate feeding select

This canonicalization was suggested in D33172 as a way to make InstCombine behavior more uniform. 
We have this transform for icmp+br, so unless there's some reason that icmp+select should be 
treated differently, we should do the same thing here.

The benefit comes from increasing the chances of creating identical instructions. This is shown in
the tests in logical-select.ll (PR32791). InstCombine doesn't fold those directly, but EarlyCSE 
can simplify the identical cmps, and then InstCombine can fold the selects together.

The possible regression for the tests in select.ll raises questions about poison/undef:
http://lists.llvm.org/pipermail/llvm-dev/2017-May/113261.html

...but that transform is just as likely to be triggered by this canonicalization as it is to be 
missed, so we're just pointing out a commutation deficiency in the pattern matching:
https://reviews.llvm.org/rL228409

Differential Revision: https://reviews.llvm.org/D34242


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306435 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Sanjay Patel 2017-06-27 17:53:22 +00:00
parent c02a794b92
commit 32f1f18b64
9 changed files with 85 additions and 63 deletions

View File

@ -1167,6 +1167,23 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *I = canonicalizeSelectToShuffle(SI))
return I;
// Canonicalize a one-use integer compare with a non-canonical predicate by
// inverting the predicate and swapping the select operands. This matches a
// compare canonicalization for conditional branches.
// TODO: Should we do the same for FP compares?
CmpInst::Predicate Pred;
if (match(CondVal, m_OneUse(m_ICmp(Pred, m_Value(), m_Value()))) &&
!isCanonicalPredicate(Pred)) {
// Swap true/false values and condition.
CmpInst *Cond = cast<CmpInst>(CondVal);
Cond->setPredicate(CmpInst::getInversePredicate(Pred));
SI.setOperand(1, FalseVal);
SI.setOperand(2, TrueVal);
SI.swapProfMetadata();
Worklist.Add(Cond);
return &SI;
}
if (SelType->getScalarType()->isIntegerTy(1) &&
TrueVal->getType() == CondVal->getType()) {
if (match(TrueVal, m_One())) {

View File

@ -150,8 +150,8 @@ define i32 @test_simplify13(i32 %x) {
; ALL-LABEL: @test_simplify13(
; ALL-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
; ALL-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1
; ALL-NEXT: [[TMP2:%.*]] = icmp ne i32 %x, 0
; ALL-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
; ALL-NEXT: [[TMP2:%.*]] = icmp eq i32 %x, 0
; ALL-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
; ALL-NEXT: ret i32 [[TMP3]]
;
%ret = call i32 @ffs(i32 %x)
@ -166,8 +166,8 @@ define i32 @test_simplify14(i32 %x) {
; TARGET-LABEL: @test_simplify14(
; TARGET-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
; TARGET-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1
; TARGET-NEXT: [[TMP2:%.*]] = icmp ne i32 %x, 0
; TARGET-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
; TARGET-NEXT: [[TMP2:%.*]] = icmp eq i32 %x, 0
; TARGET-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
; TARGET-NEXT: ret i32 [[TMP3]]
;
%ret = call i32 @ffsl(i32 %x)
@ -183,8 +183,8 @@ define i32 @test_simplify15(i64 %x) {
; TARGET-NEXT: [[CTTZ:%.*]] = call i64 @llvm.cttz.i64(i64 %x, i1 true)
; TARGET-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[CTTZ]], 1
; TARGET-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; TARGET-NEXT: [[TMP3:%.*]] = icmp ne i64 %x, 0
; TARGET-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
; TARGET-NEXT: [[TMP3:%.*]] = icmp eq i64 %x, 0
; TARGET-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
; TARGET-NEXT: ret i32 [[TMP4]]
;
%ret = call i32 @ffsll(i64 %x)

View File

@ -2423,8 +2423,8 @@ define i32 @f7(i32 %a, i32 %b) {
; CHECK-LABEL: @f7(
; CHECK-NEXT: [[CMP_UNSHIFTED:%.*]] = xor i32 %a, %b
; CHECK-NEXT: [[CMP_MASK:%.*]] = and i32 [[CMP_UNSHIFTED]], 511
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CMP:%.*]].mask, 0
; CHECK-NEXT: [[S:%.*]] = select i1 [[CMP]], i32 10000, i32 0
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CMP_MASK]], 0
; CHECK-NEXT: [[S:%.*]] = select i1 [[CMP]], i32 0, i32 10000
; CHECK-NEXT: ret i32 [[S]]
;
%sext = shl i32 %a, 23

View File

@ -62,19 +62,15 @@ define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) {
ret i32 %t3
}
; TODO: For the next 4 tests, are there potential canonicalizations and/or folds for these
; in InstCombine? Independent of that, tests like this that may not show any transforms
; still have value because they can help identify conflicting canonicalization rules that
; lead to infinite looping.
; PR32791 - https://bugs.llvm.org//show_bug.cgi?id=32791
; Fold two selects with inverted predicates and zero operands.
; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this.
define i32 @fold_inverted_icmp_preds(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @fold_inverted_icmp_preds(
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 %a, %b
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 %c, i32 0
; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 %a, %b
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 %d, i32 0
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 %a, %b
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 %d
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]]
; CHECK-NEXT: ret i32 [[OR]]
;
@ -86,12 +82,14 @@ define i32 @fold_inverted_icmp_preds(i32 %a, i32 %b, i32 %c, i32 %d) {
ret i32 %or
}
; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this.
define i32 @fold_inverted_icmp_preds_reverse(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @fold_inverted_icmp_preds_reverse(
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 %a, %b
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 %c
; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 %a, %b
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 %d
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 %a, %b
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 %d, i32 0
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]]
; CHECK-NEXT: ret i32 [[OR]]
;
@ -103,6 +101,8 @@ define i32 @fold_inverted_icmp_preds_reverse(i32 %a, i32 %b, i32 %c, i32 %d) {
ret i32 %or
}
; TODO: Should fcmp have the same sort of predicate canonicalization as icmp?
define i32 @fold_inverted_fcmp_preds(float %a, float %b, i32 %c, i32 %d) {
; CHECK-LABEL: @fold_inverted_fcmp_preds(
; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float %a, %b
@ -120,10 +120,12 @@ define i32 @fold_inverted_fcmp_preds(float %a, float %b, i32 %c, i32 %d) {
ret i32 %or
}
; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this.
define <2 x i32> @fold_inverted_icmp_vector_preds(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
; CHECK-LABEL: @fold_inverted_icmp_vector_preds(
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> %a, %b
; CHECK-NEXT: [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> %c, <2 x i32> zeroinitializer
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq <2 x i32> %a, %b
; CHECK-NEXT: [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> zeroinitializer, <2 x i32> %c
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq <2 x i32> %a, %b
; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[CMP2]], <2 x i32> %d, <2 x i32> zeroinitializer
; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[SEL1]], [[SEL2]]

View File

@ -93,14 +93,15 @@ define i32 @max_of_nots(i32 %x, i32 %y) {
; negative test case (i.e. can not simplify) : ABS(MIN(NOT x,y))
define i32 @abs_of_min_of_not(i32 %x, i32 %y) {
; CHECK-LABEL: @abs_of_min_of_not(
; CHECK-NEXT: xor
; CHECK-NEXT: add
; CHECK-NEXT: icmp sge
; CHECK-NEXT: select
; CHECK-NEXT: icmp sgt
; CHECK-NEXT: sub
; CHECK-NEXT: select
; CHECK-NEXT: ret
; CHECK-NEXT: [[XORD:%.*]] = xor i32 %x, -1
; CHECK-NEXT: [[YADD:%.*]] = add i32 %y, 2
; CHECK-NEXT: [[COND_I:%.*]] = icmp slt i32 [[YADD]], [[XORD]]
; CHECK-NEXT: [[MIN:%.*]] = select i1 [[COND_I]], i32 [[YADD]], i32 [[XORD]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[MIN]], -1
; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[MIN]]
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP2]], i32 [[MIN]], i32 [[SUB]]
; CHECK-NEXT: ret i32 [[ABS]]
;
%xord = xor i32 %x, -1
%yadd = add i32 %y, 2

View File

@ -104,10 +104,10 @@ define i32 @select_icmp_ne_0_and_32_or_4096(i32 %x, i32 %y) {
define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) {
; CHECK-LABEL: @select_icmp_ne_0_and_1073741824_or_8(
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1073741824
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
; CHECK-NEXT: [[OR:%.*]] = or i8 [[Y:%.*]], 8
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i8 [[Y]], i8 [[OR]]
; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 1073741824
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: [[OR:%.*]] = or i8 %y, 8
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i8 [[OR]], i8 %y
; CHECK-NEXT: ret i8 [[SELECT]]
;
%and = and i32 %x, 1073741824
@ -119,10 +119,10 @@ define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) {
define i32 @select_icmp_ne_0_and_8_or_1073741824(i8 %x, i32 %y) {
; CHECK-LABEL: @select_icmp_ne_0_and_8_or_1073741824(
; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], 8
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[AND]], 0
; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 1073741824
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
; CHECK-NEXT: [[AND:%.*]] = and i8 %x, 8
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[AND]], 0
; CHECK-NEXT: [[OR:%.*]] = or i32 %y, 1073741824
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 %y
; CHECK-NEXT: ret i32 [[SELECT]]
;
%and = and i8 %x, 8
@ -271,8 +271,8 @@ define i32 @test65(i64 %x) {
define i32 @test66(i64 %x) {
; CHECK-LABEL: @test66(
; CHECK-NEXT: [[TMP1:%.*]] = and i64 %x, 4294967296
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 40, i32 42
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 42, i32 40
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = and i64 %x, 4294967296
@ -376,10 +376,10 @@ define i32 @no_shift_xor_multiuse_or(i32 %x, i32 %y) {
define i32 @shift_xor_multiuse_or(i32 %x, i32 %y) {
; CHECK-LABEL: @shift_xor_multiuse_or(
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2048
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 4096
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: [[OR:%.*]] = or i32 %y, 2048
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 %y
; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[OR]]
; CHECK-NEXT: ret i32 [[RES]]
;
@ -430,11 +430,11 @@ define i32 @no_shift_no_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
define i32 @no_shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
; CHECK-LABEL: @no_shift_xor_multiuse_cmp(
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 4096
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[AND]], 4096
; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y:%.*]]
; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], %y
; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 %w, i32 %z
; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]]
; CHECK-NEXT: ret i32 [[RES]]
;

View File

@ -1220,12 +1220,13 @@ entry:
}
define i32 @test_select_select0(i32 %a, i32 %r0, i32 %r1, i32 %v1, i32 %v2) {
; CHECK-LABEL: @test_select_select0(
; CHECK: %[[C0:.*]] = icmp sge i32 %a, %v1
; CHECK-NEXT: %[[C1:.*]] = icmp slt i32 %a, %v2
; CHECK-NEXT: %[[C:.*]] = and i1 %[[C1]], %[[C0]]
; CHECK-NEXT: %[[SEL:.*]] = select i1 %[[C]], i32 %r0, i32 %r1
; CHECK-NEXT: ret i32 %[[SEL]]
; CHECK-LABEL: @test_select_select0(
; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 %a, %v1
; CHECK-NEXT: [[S0:%.*]] = select i1 [[C0]], i32 %r1, i32 %r0
; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 %a, %v2
; CHECK-NEXT: [[S1:%.*]] = select i1 [[C1]], i32 [[S0]], i32 %r1
; CHECK-NEXT: ret i32 [[S1]]
;
%c0 = icmp sge i32 %a, %v1
%s0 = select i1 %c0, i32 %r0, i32 %r1
%c1 = icmp slt i32 %a, %v2
@ -1234,12 +1235,13 @@ define i32 @test_select_select0(i32 %a, i32 %r0, i32 %r1, i32 %v1, i32 %v2) {
}
define i32 @test_select_select1(i32 %a, i32 %r0, i32 %r1, i32 %v1, i32 %v2) {
; CHECK-LABEL: @test_select_select1(
; CHECK: %[[C0:.*]] = icmp sge i32 %a, %v1
; CHECK-NEXT: %[[C1:.*]] = icmp slt i32 %a, %v2
; CHECK-NEXT: %[[C:.*]] = or i1 %[[C1]], %[[C0]]
; CHECK-NEXT: %[[SEL:.*]] = select i1 %[[C]], i32 %r0, i32 %r1
; CHECK-NEXT: ret i32 %[[SEL]]
; CHECK-LABEL: @test_select_select1(
; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 %a, %v1
; CHECK-NEXT: [[S0:%.*]] = select i1 [[C0]], i32 %r1, i32 %r0
; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 %a, %v2
; CHECK-NEXT: [[S1:%.*]] = select i1 [[C1]], i32 %r0, i32 [[S0]]
; CHECK-NEXT: ret i32 [[S1]]
;
%c0 = icmp sge i32 %a, %v1
%s0 = select i1 %c0, i32 %r0, i32 %r1
%c1 = icmp slt i32 %a, %v2

View File

@ -18,7 +18,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
;CHECK-LABEL: @function0(
;CHECK: load <4 x i32>
;CHECK: icmp sle <4 x i32>
;CHECK: icmp sgt <4 x i32>
;CHECK: mul <4 x i32>
;CHECK: add <4 x i32>
;CHECK: select <4 x i1>

View File

@ -244,7 +244,7 @@ for.end:
; SGE -> SLT
; Turn this into a min reduction (select inputs are reversed).
; CHECK-LABEL: @sge_min_red(
; CHECK: icmp sge <2 x i32>
; CHECK: icmp slt <2 x i32>
; CHECK: select <2 x i1>
; CHECK: middle.block
; CHECK: icmp slt <2 x i32>
@ -273,7 +273,7 @@ for.end:
; SLE -> SGT
; Turn this into a max reduction (select inputs are reversed).
; CHECK-LABEL: @sle_min_red(
; CHECK: icmp sle <2 x i32>
; CHECK: icmp sgt <2 x i32>
; CHECK: select <2 x i1>
; CHECK: middle.block
; CHECK: icmp sgt <2 x i32>
@ -302,7 +302,7 @@ for.end:
; UGE -> ULT
; Turn this into a min reduction (select inputs are reversed).
; CHECK-LABEL: @uge_min_red(
; CHECK: icmp uge <2 x i32>
; CHECK: icmp ult <2 x i32>
; CHECK: select <2 x i1>
; CHECK: middle.block
; CHECK: icmp ult <2 x i32>
@ -331,7 +331,7 @@ for.end:
; ULE -> UGT
; Turn this into a max reduction (select inputs are reversed).
; CHECK-LABEL: @ule_min_red(
; CHECK: icmp ule <2 x i32>
; CHECK: icmp ugt <2 x i32>
; CHECK: select <2 x i1>
; CHECK: middle.block
; CHECK: icmp ugt <2 x i32>