mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-06 12:04:52 +00:00
ab43652716
Summary: The affected transforms all implicitly use associativity of addition, for which we usually require unsafe math to be enabled. The "Aggressive" flag is only meant to convey information about the performance of the fused ops relative to a fmul+fadd sequence. Fixes Bug 31626. Reviewers: spatel, hfinkel, mehdi_amini, arsenm, tstellarAMD Subscribers: jholewinski, nemanjai, wdng, llvm-commits Differential Revision: https://reviews.llvm.org/D28675 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293635 91177308-0d34-0410-b5e6-96231b3b80d8
40 lines
1.3 KiB
LLVM
40 lines
1.3 KiB
LLVM
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s -check-prefix=CHECK
|
|
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast -enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNSAFE
|
|
|
|
define ptx_device float @t1_f32(float %x, float %y, float %z,
|
|
float %u, float %v) {
|
|
; CHECK-UNSAFE: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
|
|
; CHECK-UNSAFE: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
|
|
; CHECK: ret;
|
|
%a = fmul float %x, %y
|
|
%b = fmul float %u, %v
|
|
%c = fadd float %a, %b
|
|
%d = fadd float %c, %z
|
|
ret float %d
|
|
}
|
|
|
|
define ptx_device double @t1_f64(double %x, double %y, double %z,
|
|
double %u, double %v) {
|
|
; CHECK-UNSAFE: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
|
|
; CHECK-UNSAFE: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
|
|
; CHECK: ret;
|
|
%a = fmul double %x, %y
|
|
%b = fmul double %u, %v
|
|
%c = fadd double %a, %b
|
|
%d = fadd double %c, %z
|
|
ret double %d
|
|
}
|
|
|
|
define double @two_choices(double %val1, double %val2) {
|
|
; CHECK-LABEL: two_choices(
|
|
; CHECK: mul.f64
|
|
; CHECK-NOT: mul.f64
|
|
; CHECK: fma.rn.f64
|
|
%1 = fmul double %val1, %val2
|
|
%2 = fmul double %1, %1
|
|
%3 = fadd double %1, %2
|
|
|
|
ret double %3
|
|
}
|
|
|