Allow PeepholeOptimizer to fold a few more cases

The condition for clearing the folding candidate list was clamped together with the "uninteresting instruction" condition. This is too conservative, e.g. we don't need to clear the list when encountering an IMPLICIT_DEF. Differential Revision: http://reviews.llvm.org/D11591 llvm-svn: 244577
2024-12-02 00:16:25 +00:00 · 2015-08-11 08:19:43 +00:00 · 2015-08-11 08:19:43 +00:00 · e5fcd53d38
commit e5fcd53d38
parent 6d16ba7233
3 changed files with 14 additions and 18 deletions
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@ -1236,14 +1236,13 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {

      // If there exists an instruction which belongs to the following
      // categories, we will discard the load candidates.
+      if (MI->mayStore() || MI->isCall() || MI->hasUnmodeledSideEffects())
+        FoldAsLoadDefCandidates.clear();
+
      if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
          MI->isKill() || MI->isInlineAsm() ||
-          MI->hasUnmodeledSideEffects()) {
-        FoldAsLoadDefCandidates.clear();
+          MI->hasUnmodeledSideEffects())
        continue;
-      }
-      if (MI->mayStore() || MI->isCall())
-        FoldAsLoadDefCandidates.clear();

      if ((isUncoalescableCopy(*MI) &&
           optimizeUncoalescableCopy(MI, LocalMIs)) ||
--- a/test/CodeGen/X86/avx-cvt.ll
+++ b/test/CodeGen/X86/avx-cvt.ll
@ -113,8 +113,7 @@ define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
 define void @fpext() nounwind uwtable {
 ; CHECK-LABEL: fpext:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vcvtss2sd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    retq
  %f = alloca float, align 4
--- a/test/CodeGen/X86/shift-bmi2.ll
+++ b/test/CodeGen/X86/shift-bmi2.ll
@ -30,11 +30,10 @@ entry:
  %x = load i32, i32* %p
  %shl = shl i32 %x, %shamt
 ; BMI2: shl32p
-; Source order scheduling prevents folding, rdar:14208996.
-; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI2: ret
 ; BMI264: shl32p
-; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
  ret i32 %shl
 }
@ -75,7 +74,7 @@ entry:
  %x = load i64, i64* %p
  %shl = shl i64 %x, %shamt
 ; BMI264: shl64p
-; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
  ret i64 %shl
 }
@ -107,11 +106,10 @@ entry:
  %x = load i32, i32* %p
  %shl = lshr i32 %x, %shamt
 ; BMI2: lshr32p
-; Source order scheduling prevents folding, rdar:14208996.
-; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI2: ret
 ; BMI264: lshr32p
-; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
  ret i32 %shl
 }
@ -130,7 +128,7 @@ entry:
  %x = load i64, i64* %p
  %shl = lshr i64 %x, %shamt
 ; BMI264: lshr64p
-; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
  ret i64 %shl
 }
@ -153,10 +151,10 @@ entry:
  %shl = ashr i32 %x, %shamt
 ; BMI2: ashr32p
 ; Source order scheduling prevents folding, rdar:14208996.
-; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI2: ret
 ; BMI264: ashr32p
-; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
  ret i32 %shl
 }
@ -175,7 +173,7 @@ entry:
  %x = load i64, i64* %p
  %shl = ashr i64 %x, %shamt
 ; BMI264: ashr64p
-; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
  ret i64 %shl
 }