[InstCombine] Enable more reassociations using FMF 'reassoc' + 'nsz'

Reassociation of math ops in some contexts (especially vector contexts)
has generally only been happening when the 'fast' FMF was set.  This
enables reassoication when only the finer grained controls 'reassoc' and
'nsz' are set.

Differential Revision: https://reviews.llvm.org/D47335

llvm-svn: 333221
This commit is contained in:
Warren Ristow 2018-05-24 20:16:43 +00:00
parent 094abf2283
commit 74ca133d49
2 changed files with 142 additions and 6 deletions

View File

@ -602,7 +602,8 @@ bool Instruction::isAssociative() const {
switch (Opcode) {
case FMul:
case FAdd:
return cast<FPMathOperator>(this)->isFast();
return cast<FPMathOperator>(this)->hasAllowReassoc() &&
cast<FPMathOperator>(this)->hasNoSignedZeros();
default:
return false;
}

View File

@ -1,10 +1,145 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
; CHECK: mul
; CHECK: mul
define <4 x float> @test(<4 x float> %V) {
%Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > ; <<4 x float>> [#uses=1]
%Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 > ; <<4 x float>> [#uses=1]
; (V * C1) * C2 => V * (C1 * C2)
; Verify this doesn't fold when no fast-math-flags are specified
define <4 x float> @test_fmul(<4 x float> %V) {
; CHECK-LABEL: @test_fmul(
; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
; CHECK-NEXT: ret <4 x float> [[TMP2]]
%Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
ret <4 x float> %Z
}
; (V * C1) * C2 => V * (C1 * C2)
; Verify this folds with 'fast'
define <4 x float> @test_fmul_fast(<4 x float> %V) {
; CHECK-LABEL: @test_fmul_fast(
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
%Y = fmul fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Z = fmul fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
ret <4 x float> %Z
}
; (V * C1) * C2 => V * (C1 * C2)
; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required)
define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) {
; CHECK-LABEL: @test_fmul_reassoc_nsz(
; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
%Y = fmul reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Z = fmul reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
ret <4 x float> %Z
}
; (V * C1) * C2 => V * (C1 * C2)
; TODO: This doesn't require 'nsz'. It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 }
define <4 x float> @test_fmul_reassoc(<4 x float> %V) {
; CHECK-LABEL: @test_fmul_reassoc(
; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
; CHECK-NEXT: ret <4 x float> [[TMP2]]
%Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
ret <4 x float> %Z
}
; (V + C1) + C2 => V + (C1 + C2)
; Verify this doesn't fold when no fast-math-flags are specified
define <4 x float> @test_fadd(<4 x float> %V) {
; CHECK-LABEL: @test_fadd(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
; CHECK-NEXT: ret <4 x float> [[TMP2]]
%Y = fadd <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Z = fadd <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
ret <4 x float> %Z
}
; (V + C1) + C2 => V + (C1 + C2)
; Verify this folds with 'fast'
define <4 x float> @test_fadd_fast(<4 x float> %V) {
; CHECK-LABEL: @test_fadd_fast(
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
%Y = fadd fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Z = fadd fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
ret <4 x float> %Z
}
; (V + C1) + C2 => V + (C1 + C2)
; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required)
define <4 x float> @test_fadd_reassoc_nsz(<4 x float> %V) {
; CHECK-LABEL: @test_fadd_reassoc_nsz(
; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
%Y = fadd reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Z = fadd reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
ret <4 x float> %Z
}
; (V + C1) + C2 => V + (C1 + C2)
; TODO: This doesn't require 'nsz'. It should fold to V + { 2.0, 4.0, 0.0, 8.0 }
define <4 x float> @test_fadd_reassoc(<4 x float> %V) {
; CHECK-LABEL: @test_fadd_reassoc(
; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
; CHECK-NEXT: ret <4 x float> [[TMP2]]
%Y = fadd reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Z = fadd reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
ret <4 x float> %Z
}
; ( A + C1 ) + ( B + -C1 )
; Verify this doesn't fold when no fast-math-flags are specified
define <4 x float> @test_fadds_cancel_(<4 x float> %A, <4 x float> %B) {
; CHECK-LABEL: @test_fadds_cancel_(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <4 x float> [[TMP3]]
%X = fadd <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Y = fadd <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
%Z = fadd <4 x float> %X, %Y
ret <4 x float> %Z
}
; ( A + C1 ) + ( B + -C1 )
; Verify this folds to 'A + B' with 'fast'
define <4 x float> @test_fadds_cancel_fast(<4 x float> %A, <4 x float> %B) {
; CHECK-LABEL: @test_fadds_cancel_fast(
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret <4 x float> [[TMP1]]
%X = fadd fast <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Y = fadd fast <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
%Z = fadd fast <4 x float> %X, %Y
ret <4 x float> %Z
}
; ( A + C1 ) + ( B + -C1 )
; Verify this folds to 'A + B' with 'reassoc' and 'nsz' ('nsz' is required)
define <4 x float> @test_fadds_cancel_reassoc_nsz(<4 x float> %A, <4 x float> %B) {
; CHECK-LABEL: @test_fadds_cancel_reassoc_nsz(
; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret <4 x float> [[TMP1]]
%X = fadd reassoc nsz <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Y = fadd reassoc nsz <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
%Z = fadd reassoc nsz <4 x float> %X, %Y
ret <4 x float> %Z
}
; ( A + C1 ) + ( B + -C1 )
; Verify the fold is not done with only 'reassoc' ('nsz' is required).
define <4 x float> @test_fadds_cancel_reassoc(<4 x float> %A, <4 x float> %B) {
; CHECK-LABEL: @test_fadds_cancel_reassoc(
; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <4 x float> [[TMP3]]
%X = fadd reassoc <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Y = fadd reassoc <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
%Z = fadd reassoc <4 x float> %X, %Y
ret <4 x float> %Z
}