From 1f504230b9860bd739ef4e6b5ace45fd82623d75 Mon Sep 17 00:00:00 2001 From: Joel Galenson Date: Fri, 19 Jan 2018 17:46:27 +0000 Subject: [PATCH] [ARM] Fix perf regression in compare optimization. Fix a performance regression caused by r322737. While trying to make it easier to replace compares with existing adds and subtracts, I accidentally stopped it from doing so in some cases. This should fix that. I'm also fixing another potential bug in that commit. Differential Revision: https://reviews.llvm.org/D42263 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@322972 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 5 ++- .../ARM/overflow-intrinsic-optimizations.ll | 32 +++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 9981b0586d6..45724df9619 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2736,7 +2736,7 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( } I = CmpInstr; E = MI; - } else { + } else if (E != B) { // Allow the loop below to search E (which was initially MI). Since MI and // SubAdd have different tests, even if that instruction could not be MI, it // could still potentially be SubAdd. @@ -2763,8 +2763,7 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( return false; if (I == B) - // The 'and' is below the comparison instruction. - return false; + break; } // Return false if no candidates exist. diff --git a/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll b/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll index 2bfd18720bc..cff5b8998e4 100644 --- a/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll +++ b/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll @@ -197,6 +197,38 @@ cont1: declare void @external_fn(...) local_unnamed_addr #0 +define i32 @are_equal(i32* nocapture readonly %a1, i32* nocapture readonly %a2, i32 %n) local_unnamed_addr #0 { +; CHECK-LABEL: are_equal +; CHECK: subs r{{[0-9]+}}, r{{[0-9]+}}, #1 +; CHECK-NEXT: bne +entry: + %tobool7 = icmp eq i32 %n, 0 + br i1 %tobool7, label %while.end, label %land.rhs.preheader + +land.rhs.preheader: + br label %land.rhs + +while.cond: + %tobool = icmp eq i32 %dec9, 0 + br i1 %tobool, label %while.end, label %land.rhs + +land.rhs: + %dec9.in = phi i32 [ %dec9, %while.cond ], [ %n, %land.rhs.preheader ] + %dec9 = add nsw i32 %dec9.in, -1 + %arrayidx = getelementptr inbounds i32, i32* %a1, i32 %dec9 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %a2, i32 %dec9 + %1 = load i32, i32* %arrayidx1, align 4 + %cmp = icmp eq i32 %0, %1 + br i1 %cmp, label %while.cond, label %while.end + +while.end: + %n.addr.0.lcssa = phi i32 [ 0, %entry ], [ 0, %while.cond ], [ %dec9.in, %land.rhs ] + %cmp2 = icmp slt i32 %n.addr.0.lcssa, 1 + %conv = zext i1 %cmp2 to i32 + ret i32 %conv +} + declare void @llvm.trap() #2 declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1