[InstCombine] Enable more reassociations using FMF 'reassoc' + 'nsz'

Reassociation of math ops in some contexts (especially vector contexts) has generally only been happening when the 'fast' FMF was set. This enables reassoication when only the finer grained controls 'reassoc' and 'nsz' are set. Differential Revision: https://reviews.llvm.org/D47335 llvm-svn: 333221
2024-12-13 22:58:50 +00:00 · 2018-05-24 20:16:43 +00:00 · 2018-05-24 20:16:43 +00:00 · 74ca133d49
commit 74ca133d49
parent 094abf2283
2 changed files with 142 additions and 6 deletions
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@ -602,7 +602,8 @@ bool Instruction::isAssociative() const {
  switch (Opcode) {
  case FMul:
  case FAdd:
-    return cast<FPMathOperator>(this)->isFast();
+    return cast<FPMathOperator>(this)->hasAllowReassoc() &&
+           cast<FPMathOperator>(this)->hasNoSignedZeros();
  default:
    return false;
  }
--- a/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
+++ b/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@ -1,10 +1,145 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; CHECK: mul
-; CHECK: mul

-define <4 x float> @test(<4 x float> %V) {
-        %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >                ; <<4 x float>> [#uses=1]
-        %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >               ; <<4 x float>> [#uses=1]
+; (V * C1) * C2 => V * (C1 * C2)
+; Verify this doesn't fold when no fast-math-flags are specified
+define <4 x float> @test_fmul(<4 x float> %V) {
+; CHECK-LABEL: @test_fmul(
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+        %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
        ret <4 x float> %Z
 }

+; (V * C1) * C2 => V * (C1 * C2)
+; Verify this folds with 'fast'
+define <4 x float> @test_fmul_fast(<4 x float> %V) {
+; CHECK-LABEL: @test_fmul_fast(
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul fast <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %Y = fmul fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fmul fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V * C1) * C2 => V * (C1 * C2)
+; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required)
+define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) {
+; CHECK-LABEL: @test_fmul_reassoc_nsz(
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc nsz <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %Y = fmul reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fmul reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V * C1) * C2 => V * (C1 * C2)
+; TODO: This doesn't require 'nsz'.  It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 }
+define <4 x float> @test_fmul_reassoc(<4 x float> %V) {
+; CHECK-LABEL: @test_fmul_reassoc(
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+        %Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V + C1) + C2 => V + (C1 + C2)
+; Verify this doesn't fold when no fast-math-flags are specified
+define <4 x float> @test_fadd(<4 x float> %V) {
+; CHECK-LABEL: @test_fadd(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+        %Y = fadd <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fadd <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V + C1) + C2 => V + (C1 + C2)
+; Verify this folds with 'fast'
+define <4 x float> @test_fadd_fast(<4 x float> %V) {
+; CHECK-LABEL: @test_fadd_fast(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd fast <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %Y = fadd fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fadd fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V + C1) + C2 => V + (C1 + C2)
+; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required)
+define <4 x float> @test_fadd_reassoc_nsz(<4 x float> %V) {
+; CHECK-LABEL: @test_fadd_reassoc_nsz(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %Y = fadd reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fadd reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V + C1) + C2 => V + (C1 + C2)
+; TODO: This doesn't require 'nsz'.  It should fold to V + { 2.0, 4.0, 0.0, 8.0 }
+define <4 x float> @test_fadd_reassoc(<4 x float> %V) {
+; CHECK-LABEL: @test_fadd_reassoc(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fadd reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+        %Y = fadd reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fadd reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; ( A + C1 ) + ( B + -C1 )
+; Verify this doesn't fold when no fast-math-flags are specified
+define <4 x float> @test_fadds_cancel_(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_fadds_cancel_(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fadd <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
+; CHECK-NEXT:     [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:     ret <4 x float> [[TMP3]]
+        %X = fadd <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Y = fadd <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
+        %Z = fadd <4 x float> %X, %Y
+        ret <4 x float> %Z
+}
+
+; ( A + C1 ) + ( B + -C1 )
+; Verify this folds to 'A + B' with 'fast'
+define <4 x float> @test_fadds_cancel_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_fadds_cancel_fast(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd fast <4 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %X = fadd fast <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Y = fadd fast <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
+        %Z = fadd fast <4 x float> %X, %Y
+        ret <4 x float> %Z
+}
+
+; ( A + C1 ) + ( B + -C1 )
+; Verify this folds to 'A + B' with 'reassoc' and 'nsz' ('nsz' is required)
+define <4 x float> @test_fadds_cancel_reassoc_nsz(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_fadds_cancel_reassoc_nsz(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %X = fadd reassoc nsz <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Y = fadd reassoc nsz <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
+        %Z = fadd reassoc nsz <4 x float> %X, %Y
+        ret <4 x float> %Z
+}
+
+; ( A + C1 ) + ( B + -C1 )
+; Verify the fold is not done with only 'reassoc' ('nsz' is required).
+define <4 x float> @test_fadds_cancel_reassoc(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_fadds_cancel_reassoc(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fadd reassoc <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
+; CHECK-NEXT:     [[TMP3:%.*]] = fadd reassoc <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:     ret <4 x float> [[TMP3]]
+        %X = fadd reassoc <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Y = fadd reassoc <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
+        %Z = fadd reassoc <4 x float> %X, %Y
+        ret <4 x float> %Z
+}