mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-13 22:58:50 +00:00
[InstCombine] Enable more reassociations using FMF 'reassoc' + 'nsz'
Reassociation of math ops in some contexts (especially vector contexts) has generally only been happening when the 'fast' FMF was set. This enables reassoication when only the finer grained controls 'reassoc' and 'nsz' are set. Differential Revision: https://reviews.llvm.org/D47335 llvm-svn: 333221
This commit is contained in:
parent
094abf2283
commit
74ca133d49
@ -602,7 +602,8 @@ bool Instruction::isAssociative() const {
|
||||
switch (Opcode) {
|
||||
case FMul:
|
||||
case FAdd:
|
||||
return cast<FPMathOperator>(this)->isFast();
|
||||
return cast<FPMathOperator>(this)->hasAllowReassoc() &&
|
||||
cast<FPMathOperator>(this)->hasNoSignedZeros();
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
@ -1,10 +1,145 @@
|
||||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
; CHECK: mul
|
||||
; CHECK: mul
|
||||
|
||||
define <4 x float> @test(<4 x float> %V) {
|
||||
%Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > ; <<4 x float>> [#uses=1]
|
||||
%Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 > ; <<4 x float>> [#uses=1]
|
||||
; (V * C1) * C2 => V * (C1 * C2)
|
||||
; Verify this doesn't fold when no fast-math-flags are specified
|
||||
define <4 x float> @test_fmul(<4 x float> %V) {
|
||||
; CHECK-LABEL: @test_fmul(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP2]]
|
||||
%Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
|
||||
%Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
|
||||
ret <4 x float> %Z
|
||||
}
|
||||
|
||||
; (V * C1) * C2 => V * (C1 * C2)
|
||||
; Verify this folds with 'fast'
|
||||
define <4 x float> @test_fmul_fast(<4 x float> %V) {
|
||||
; CHECK-LABEL: @test_fmul_fast(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
%Y = fmul fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
|
||||
%Z = fmul fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
|
||||
ret <4 x float> %Z
|
||||
}
|
||||
|
||||
; (V * C1) * C2 => V * (C1 * C2)
|
||||
; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required)
|
||||
define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) {
|
||||
; CHECK-LABEL: @test_fmul_reassoc_nsz(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
%Y = fmul reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
|
||||
%Z = fmul reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
|
||||
ret <4 x float> %Z
|
||||
}
|
||||
|
||||
; (V * C1) * C2 => V * (C1 * C2)
|
||||
; TODO: This doesn't require 'nsz'. It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 }
|
||||
define <4 x float> @test_fmul_reassoc(<4 x float> %V) {
|
||||
; CHECK-LABEL: @test_fmul_reassoc(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP2]]
|
||||
%Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
|
||||
%Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
|
||||
ret <4 x float> %Z
|
||||
}
|
||||
|
||||
; (V + C1) + C2 => V + (C1 + C2)
|
||||
; Verify this doesn't fold when no fast-math-flags are specified
|
||||
define <4 x float> @test_fadd(<4 x float> %V) {
|
||||
; CHECK-LABEL: @test_fadd(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP2]]
|
||||
%Y = fadd <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
|
||||
%Z = fadd <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
|
||||
ret <4 x float> %Z
|
||||
}
|
||||
|
||||
; (V + C1) + C2 => V + (C1 + C2)
|
||||
; Verify this folds with 'fast'
|
||||
define <4 x float> @test_fadd_fast(<4 x float> %V) {
|
||||
; CHECK-LABEL: @test_fadd_fast(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
%Y = fadd fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
|
||||
%Z = fadd fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
|
||||
ret <4 x float> %Z
|
||||
}
|
||||
|
||||
; (V + C1) + C2 => V + (C1 + C2)
|
||||
; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required)
|
||||
define <4 x float> @test_fadd_reassoc_nsz(<4 x float> %V) {
|
||||
; CHECK-LABEL: @test_fadd_reassoc_nsz(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
%Y = fadd reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
|
||||
%Z = fadd reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
|
||||
ret <4 x float> %Z
|
||||
}
|
||||
|
||||
; (V + C1) + C2 => V + (C1 + C2)
|
||||
; TODO: This doesn't require 'nsz'. It should fold to V + { 2.0, 4.0, 0.0, 8.0 }
|
||||
define <4 x float> @test_fadd_reassoc(<4 x float> %V) {
|
||||
; CHECK-LABEL: @test_fadd_reassoc(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP2]]
|
||||
%Y = fadd reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
|
||||
%Z = fadd reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
|
||||
ret <4 x float> %Z
|
||||
}
|
||||
|
||||
; ( A + C1 ) + ( B + -C1 )
|
||||
; Verify this doesn't fold when no fast-math-flags are specified
|
||||
define <4 x float> @test_fadds_cancel_(<4 x float> %A, <4 x float> %B) {
|
||||
; CHECK-LABEL: @test_fadds_cancel_(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP3]]
|
||||
%X = fadd <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
|
||||
%Y = fadd <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
|
||||
%Z = fadd <4 x float> %X, %Y
|
||||
ret <4 x float> %Z
|
||||
}
|
||||
|
||||
; ( A + C1 ) + ( B + -C1 )
|
||||
; Verify this folds to 'A + B' with 'fast'
|
||||
define <4 x float> @test_fadds_cancel_fast(<4 x float> %A, <4 x float> %B) {
|
||||
; CHECK-LABEL: @test_fadds_cancel_fast(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
%X = fadd fast <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
|
||||
%Y = fadd fast <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
|
||||
%Z = fadd fast <4 x float> %X, %Y
|
||||
ret <4 x float> %Z
|
||||
}
|
||||
|
||||
; ( A + C1 ) + ( B + -C1 )
|
||||
; Verify this folds to 'A + B' with 'reassoc' and 'nsz' ('nsz' is required)
|
||||
define <4 x float> @test_fadds_cancel_reassoc_nsz(<4 x float> %A, <4 x float> %B) {
|
||||
; CHECK-LABEL: @test_fadds_cancel_reassoc_nsz(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
%X = fadd reassoc nsz <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
|
||||
%Y = fadd reassoc nsz <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
|
||||
%Z = fadd reassoc nsz <4 x float> %X, %Y
|
||||
ret <4 x float> %Z
|
||||
}
|
||||
|
||||
; ( A + C1 ) + ( B + -C1 )
|
||||
; Verify the fold is not done with only 'reassoc' ('nsz' is required).
|
||||
define <4 x float> @test_fadds_cancel_reassoc(<4 x float> %A, <4 x float> %B) {
|
||||
; CHECK-LABEL: @test_fadds_cancel_reassoc(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc <4 x float> [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP3]]
|
||||
%X = fadd reassoc <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
|
||||
%Y = fadd reassoc <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
|
||||
%Z = fadd reassoc <4 x float> %X, %Y
|
||||
ret <4 x float> %Z
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user