InstSimplify: Eliminate fabs on known positive

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291624 91177308-0d34-0410-b5e6-96231b3b80d8
2025-02-14 15:57:47 +00:00 · 2017-01-11 00:33:24 +00:00 · 2017-01-11 00:33:24 +00:00 · 1f4353fb62
commit 1f4353fb62
parent a618f4d4ce
6 changed files with 212 additions and 32 deletions
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@ -169,8 +169,12 @@ template <typename T> class ArrayRef;

  /// Return true if we can prove that the specified FP value is either a NaN or
  /// never less than 0.0.
-  bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI,
-                                   unsigned Depth = 0);
+  /// If \p IncludeNeg0 is false, -0.0 is considered less than 0.0.
+  bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI);
+
+  /// \returns true if we can prove that the specified FP value has a 0 sign
+  /// bit.
+  bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI);

  /// If the specified value can be set by repeating the same byte in memory,
  /// return the i8 value that it is represented with. This is true for all i8
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@ -4308,10 +4308,21 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
    return nullptr;

  // Unary Ops
-  if (NumOperands == 1)
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin))
+  if (NumOperands == 1) {
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) {
      if (II->getIntrinsicID() == IID)
        return II;
+    }
+
+    switch (IID) {
+    case Intrinsic::fabs: {
+      if (SignBitMustBeZero(*ArgBegin, Q.TLI))
+        return *ArgBegin;
+    }
+    default:
+      break;
+    }
+  }

  return nullptr;
 }
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@ -2580,51 +2580,70 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
  return false;
 }

-bool llvm::CannotBeOrderedLessThanZero(const Value *V,
-                                       const TargetLibraryInfo *TLI,
-                                       unsigned Depth) {
-  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
-    return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero();
+/// If \p SignBitOnly is true, test for a known 0 sign bit rather than a
+/// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign
+/// bit despite comparing equal.
+static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
+                                            const TargetLibraryInfo *TLI,
+                                            bool SignBitOnly,
+                                            unsigned Depth) {
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    return !CFP->getValueAPF().isNegative() ||
+           (!SignBitOnly && CFP->getValueAPF().isZero());
+  }

  if (Depth == MaxDepth)
-    return false;  // Limit search depth.
+    return false; // Limit search depth.

  const Operator *I = dyn_cast<Operator>(V);
-  if (!I) return false;
+  if (!I)
+    return false;

  switch (I->getOpcode()) {
-  default: break;
+  default:
+    break;
  // Unsigned integers are always nonnegative.
  case Instruction::UIToFP:
    return true;
  case Instruction::FMul:
    // x*x is always non-negative or a NaN.
-    if (I->getOperand(0) == I->getOperand(1))
+    if (I->getOperand(0) == I->getOperand(1) &&
+        (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()))
      return true;
+
    LLVM_FALLTHROUGH;
  case Instruction::FAdd:
  case Instruction::FDiv:
  case Instruction::FRem:
-    return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) &&
-           CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1);
+    return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                           Depth + 1) &&
+           cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                           Depth + 1);
  case Instruction::Select:
-    return CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1) &&
-           CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1);
+    return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                           Depth + 1) &&
+           cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
+                                           Depth + 1);
  case Instruction::FPExt:
  case Instruction::FPTrunc:
    // Widening/narrowing never change sign.
-    return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1);
+    return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                           Depth + 1);
  case Instruction::Call:
    Intrinsic::ID IID = getIntrinsicForCallSite(cast<CallInst>(I), TLI);
    switch (IID) {
    default:
      break;
    case Intrinsic::maxnum:
-      return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) ||
-             CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1);
+      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                             Depth + 1) ||
+             cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                             Depth + 1);
    case Intrinsic::minnum:
-      return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) &&
-             CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1);
+      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                             Depth + 1) &&
+             cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                             Depth + 1);
    case Intrinsic::exp:
    case Intrinsic::exp2:
    case Intrinsic::fabs:
@ -2636,18 +2655,30 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V,
        if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0)
          return true;
      }
-      return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1);
+      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                             Depth + 1);
    case Intrinsic::fma:
    case Intrinsic::fmuladd:
      // x*x+y is non-negative if y is non-negative.
      return I->getOperand(0) == I->getOperand(1) &&
-             CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1);
+             (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) &&
+             cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
+                                             Depth + 1);
    }
    break;
  }
  return false;
 }

+bool llvm::CannotBeOrderedLessThanZero(const Value *V,
+                                       const TargetLibraryInfo *TLI) {
+  return cannotBeOrderedLessThanZeroImpl(V, TLI, false, 0);
+}
+
+bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) {
+  return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
+}
+
 /// If the specified value can be set by repeating the same byte in memory,
 /// return the i8 value that it is represented with.  This is
 /// true for all i8 values obviously, but is also true for i32 0, i32 -1,
--- a/test/Transforms/InstCombine/fabs.ll
+++ b/test/Transforms/InstCombine/fabs.ll
@ -5,6 +5,8 @@
 declare float @fabsf(float)
 declare double @fabs(double)
 declare fp128 @fabsl(fp128)
+declare float @llvm.fma.f32(float, float, float)
+declare float @llvm.fmuladd.f32(float, float, float)

 define float @square_fabs_call_f32(float %x) {
  %mul = fmul float %x, %x
@ -80,7 +82,6 @@ define fp128 @square_fabs_intrinsic_f128(fp128 %x) {
 ; CHECK-NEXT: ret fp128 %fabsl
 }

-; TODO: This should be able to elimnated the fabs
 define float @square_nnan_fabs_intrinsic_f32(float %x) {
  %mul = fmul nnan float %x, %x
  %fabsf = call float @llvm.fabs.f32(float %mul)
@ -88,8 +89,7 @@ define float @square_nnan_fabs_intrinsic_f32(float %x) {

 ; CHECK-LABEL: square_nnan_fabs_intrinsic_f32(
 ; CHECK-NEXT: %mul = fmul nnan float %x, %x
-; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %mul)
-; CHECK-NEXT: ret float %fabsf
+; CHECK-NEXT: ret float %mul
 }

 ; Shrinking a library call to a smaller type should not be inhibited by nor inhibit the square optimization.
@ -170,3 +170,47 @@ define float @fabs_select_var_constant_negative(i32 %c, float %x) {
  %fabs = call float @llvm.fabs.f32(float %select)
  ret float %fabs
 }
+
+; The fabs cannot be eliminated because %x may be a NaN
+define float @square_fma_fabs_intrinsic_f32(float %x) {
+  %fma = call float @llvm.fma.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fma)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_fma_fabs_intrinsic_f32(
+; CHECK-NEXT: %fma = call float @llvm.fma.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %fma)
+; CHECK-NEXT: ret float %fabsf
+}
+
+; The fabs cannot be eliminated because %x may be a NaN
+define float @square_nnan_fma_fabs_intrinsic_f32(float %x) {
+  %fma = call nnan float @llvm.fma.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fma)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_nnan_fma_fabs_intrinsic_f32(
+; CHECK-NEXT: %fma = call nnan float @llvm.fma.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float %fma
+}
+
+define float @square_fmuladd_fabs_intrinsic_f32(float %x) {
+  %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fmuladd)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_fmuladd_fabs_intrinsic_f32(
+; CHECK-NEXT: %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %fmuladd)
+; CHECK-NEXT: ret float %fabsf
+}
+
+define float @square_nnan_fmuladd_fabs_intrinsic_f32(float %x) {
+  %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fmuladd)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_nnan_fmuladd_fabs_intrinsic_f32(
+; CHECK-NEXT: %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float %fmuladd
+}
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@ -241,7 +241,7 @@ define float @fmul2(float %f1) {
 ; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses
@fmul2_external = external global float
 define float @fmul2_disable(float %f1) {
-  %div = fdiv fast float 1.000000e+00, %f1 
+  %div = fdiv fast float 1.000000e+00, %f1
  store float %div, float* @fmul2_external
  %mul = fmul fast float %div, 2.000000e+00
  ret float %mul
@ -672,8 +672,7 @@ define double @sqrt_intrinsic_arg_4th(double %x) {

 ; CHECK-LABEL: sqrt_intrinsic_arg_4th(
 ; CHECK-NEXT: %mul = fmul fast double %x, %x
-; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul)
-; CHECK-NEXT: ret double %fabs
+; CHECK-NEXT: ret double %mul
 }

 define double @sqrt_intrinsic_arg_5th(double %x) {
@ -685,9 +684,8 @@ define double @sqrt_intrinsic_arg_5th(double %x) {

 ; CHECK-LABEL: sqrt_intrinsic_arg_5th(
 ; CHECK-NEXT: %mul = fmul fast double %x, %x
-; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul)
 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x)
-; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1
 ; CHECK-NEXT: ret double %1
 }

--- a/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@ -103,3 +103,95 @@ define float @PR22688(float %x) {
  ret float %7
 }

+declare float @llvm.fabs.f32(float)
+
+; CHECK-LABEL: @fabs_select_positive_constants(
+; CHECK: %select = select i1 %cmp, float 1.000000e+00, float 2.000000e+00
+; CHECK-NEXT: ret float %select
+define float @fabs_select_positive_constants(i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 1.0, float 2.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_constant_variable(
+; CHECK: %select = select i1 %cmp, float 1.000000e+00, float %x
+; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_constant_variable(i32 %c, float %x) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 1.0, float %x
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_neg0_pos0(
+; CHECK: %select = select i1 %cmp, float -0.000000e+00, float 0.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+; CHECK-NEXT: ret float %fabs
+define float @fabs_select_neg0_pos0(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float -0.0, float 0.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_neg0_neg1(
+; CHECK: %select = select i1 %cmp, float -0.000000e+00, float -1.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_neg0_neg1(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float -0.0, float -1.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_nan_nan(
+; CHECK: %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000
+; CHECK-NEXT: ret float %select
+define float @fabs_select_nan_nan(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_nan(
+; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_nan(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_negnan(
+; CHECK:  %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_negnan(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_negzero(
+; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float -0.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_negzero(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float -0.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_zero(
+; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_zero(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float 0.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}