[InstCombine] morph an existing instruction instead of creating a new one

One potential way to make InstCombine (very slightly?) faster is to recycle instructions when possible instead of creating new ones. It's not explicitly stated AFAIK, but we don't consider this an "InstSimplify". We could, however, make a new layer to house transforms like this if that makes InstCombine more manageable (just throwing out an idea; not sure how much opportunity is actually here). Differential Revision: https://reviews.llvm.org/D31863 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300067 91177308-0d34-0410-b5e6-96231b3b80d8
2025-03-04 08:37:45 +00:00 · 2017-04-12 15:11:33 +00:00 · 2017-04-12 15:11:33 +00:00 · e930b092c4
commit e930b092c4
parent df5e51ce8f
4 changed files with 12 additions and 13 deletions
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@ -2408,13 +2408,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
    }
  }

-  if (Constant *RHS = dyn_cast<Constant>(Op1)) {
-    if (RHS->isAllOnesValue() && Op0->hasOneUse())
-      // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
-      if (CmpInst *CI = dyn_cast<CmpInst>(Op0))
-        return CmpInst::Create(CI->getOpcode(),
-                               CI->getInversePredicate(),
-                               CI->getOperand(0), CI->getOperand(1));
+  // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+  ICmpInst::Predicate Pred;
+  if (match(Op0, m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))) &&
+      match(Op1, m_AllOnes())) {
+    cast<CmpInst>(Op0)->setPredicate(CmpInst::getInversePredicate(Pred));
+    return replaceInstUsesWith(I, Op0);
  }

  if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) {
--- a/test/Transforms/InstCombine/select-cmp-br.ll
+++ b/test/Transforms/InstCombine/select-cmp-br.ll
@ -15,8 +15,8 @@ define void @test1(%C* %arg) {
 ; CHECK-NEXT:    [[M:%.*]] = load i64*, i64** [[TMP]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 1, i32 0, i32 0
 ; CHECK-NEXT:    [[N:%.*]] = load i64*, i64** [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
 ; CHECK-NEXT:    [[NOT_TMP5:%.*]] = icmp ne i64* [[M]], [[N]]
+; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP71]], [[NOT_TMP5]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[BB10:%.*]], label [[BB8:%.*]]
 ; CHECK:       bb:
@ -115,8 +115,8 @@ define void @test3(%C* %arg) {
 ; CHECK-NEXT:    [[M:%.*]] = load i64*, i64** [[TMP]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 1, i32 0, i32 0
 ; CHECK-NEXT:    [[N:%.*]] = load i64*, i64** [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
 ; CHECK-NEXT:    [[NOT_TMP5:%.*]] = icmp ne i64* [[M]], [[N]]
+; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP71]], [[NOT_TMP5]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[BB10:%.*]], label [[BB8:%.*]]
 ; CHECK:       bb:
--- a/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@ -18,9 +18,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3

 ;CHECK-LABEL: @function0(
 ;CHECK: load <4 x i32>
+;CHECK: icmp sle <4 x i32>
 ;CHECK: mul <4 x i32>
 ;CHECK: add <4 x i32>
-;CHECK: icmp sle <4 x i32>
 ;CHECK: select <4 x i1>
 ;CHECK: ret i32
 define i32 @function0(i32* nocapture %a, i32* nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp {
@ -71,8 +71,8 @@ for.end:

 ;CHECK-LABEL: @reduction_func(
 ;CHECK: load <4 x i32>
-;CHECK: add <4 x i32>
 ;CHECK: icmp slt <4 x i32>
+;CHECK: add <4 x i32>
 ;CHECK: select <4 x i1>
 ;CHECK: ret i32
 define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
--- a/test/Transforms/SimplifyCFG/merge-cond-stores.ll
+++ b/test/Transforms/SimplifyCFG/merge-cond-stores.ll
@ -5,8 +5,8 @@
 define void @test_simple(i32* %p, i32 %a, i32 %b) {
 ; CHECK-LABEL: @test_simple(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ne i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[X2]], true
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
@ -43,8 +43,8 @@ define void @test_recursive(i32* %p, i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    [[X4:%.*]] = icmp eq i32 [[D:%.*]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[TMP0]], [[C:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = xor i1 [[X4]], true
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i1 [[X4]], true
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]]
 ; CHECK:         [[X3:%.*]] = icmp eq i32 [[C]], 0