mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-21 03:37:47 +00:00
PTX: Adjust rounding modes
* rounding modes for fp add, mul, sub now use .rn * float -> int rounding correctly uses .rzi not .rni * 32bit fdiv for sm13 uses div.rn (instead of div.approx) * 32bit fdiv for sm10 now uses div (instead of div.approx) Approx is not IEEE 754 compatible (and should be optionally set by a flag to the backend instead). The .rn rounding modifier is the PTX default anyway, but it's better to be explicit. All these modifiers should be available by using __fmul_rz functions for example, but support will need to be added for this in the backend. Patch by Dan Bailey git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133253 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f1b7e94add
commit
707fd44038
@ -584,24 +584,39 @@ defm REM : INT3<"rem", urem>;
|
||||
defm FNEG : PTX_FLOAT_2OP<"neg", fneg>;
|
||||
|
||||
// Standard Binary Operations
|
||||
defm FADD : PTX_FLOAT_3OP<"add", fadd>;
|
||||
defm FSUB : PTX_FLOAT_3OP<"sub", fsub>;
|
||||
defm FMUL : PTX_FLOAT_3OP<"mul", fmul>;
|
||||
defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>;
|
||||
defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>;
|
||||
defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>;
|
||||
|
||||
// TODO: Allow user selection of rounding modes for fdiv.
|
||||
// For division, we need to have f32 and f64 differently.
|
||||
// For f32, we just always use .approx since it is supported on all hardware
|
||||
// for PTX 1.4+, which is our minimum target.
|
||||
def FDIVrr32 : InstPTX<(outs RegF32:$d),
|
||||
// For floating-point division:
|
||||
// SM_13+ defaults to .rn for f32 and f64,
|
||||
// SM10 must *not* provide a rounding
|
||||
|
||||
// TODO:
|
||||
// - Allow user selection of rounding modes for fdiv
|
||||
// - Add support for -prec-div=false (.approx)
|
||||
|
||||
def FDIVrr32SM13 : InstPTX<(outs RegF32:$d),
|
||||
(ins RegF32:$a, RegF32:$b),
|
||||
"div.approx.f32\t$d, $a, $b",
|
||||
[(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>;
|
||||
def FDIVri32 : InstPTX<(outs RegF32:$d),
|
||||
"div.rn.f32\t$d, $a, $b",
|
||||
[(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
|
||||
Requires<[SupportsSM13]>;
|
||||
def FDIVri32SM13 : InstPTX<(outs RegF32:$d),
|
||||
(ins RegF32:$a, f32imm:$b),
|
||||
"div.approx.f32\t$d, $a, $b",
|
||||
[(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>;
|
||||
"div.rn.f32\t$d, $a, $b",
|
||||
[(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
|
||||
Requires<[SupportsSM13]>;
|
||||
def FDIVrr32SM10 : InstPTX<(outs RegF32:$d),
|
||||
(ins RegF32:$a, RegF32:$b),
|
||||
"div.f32\t$d, $a, $b",
|
||||
[(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
|
||||
Requires<[DoesNotSupportSM13]>;
|
||||
def FDIVri32SM10 : InstPTX<(outs RegF32:$d),
|
||||
(ins RegF32:$a, f32imm:$b),
|
||||
"div.f32\t$d, $a, $b",
|
||||
[(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
|
||||
Requires<[DoesNotSupportSM13]>;
|
||||
|
||||
// For f64, we must specify a rounding for sm 1.3+ but *not* for sm 1.0.
|
||||
def FDIVrr64SM13 : InstPTX<(outs RegF64:$d),
|
||||
(ins RegF64:$a, RegF64:$b),
|
||||
"div.rn.f64\t$d, $a, $b",
|
||||
@ -825,11 +840,11 @@ def CVT_pred_u64
|
||||
[(set RegPred:$d, (trunc RegI64:$a))]>;
|
||||
|
||||
def CVT_pred_f32
|
||||
: InstPTX<(outs RegPred:$d), (ins RegF32:$a), "cvt.rni.pred.f32\t$d, $a",
|
||||
: InstPTX<(outs RegPred:$d), (ins RegF32:$a), "cvt.rzi.pred.f32\t$d, $a",
|
||||
[(set RegPred:$d, (fp_to_uint RegF32:$a))]>;
|
||||
|
||||
def CVT_pred_f64
|
||||
: InstPTX<(outs RegPred:$d), (ins RegF64:$a), "cvt.rni.pred.f64\t$d, $a",
|
||||
: InstPTX<(outs RegPred:$d), (ins RegF64:$a), "cvt.rzi.pred.f64\t$d, $a",
|
||||
[(set RegPred:$d, (fp_to_uint RegF64:$a))]>;
|
||||
|
||||
// Conversion to u16
|
||||
@ -847,11 +862,11 @@ def CVT_u16_u64
|
||||
[(set RegI16:$d, (trunc RegI64:$a))]>;
|
||||
|
||||
def CVT_u16_f32
|
||||
: InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rni.u16.f32\t$d, $a",
|
||||
: InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a",
|
||||
[(set RegI16:$d, (fp_to_uint RegF32:$a))]>;
|
||||
|
||||
def CVT_u16_f64
|
||||
: InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rni.u16.f64\t$d, $a",
|
||||
: InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a",
|
||||
[(set RegI16:$d, (fp_to_uint RegF64:$a))]>;
|
||||
|
||||
// Conversion to u32
|
||||
@ -869,11 +884,11 @@ def CVT_u32_u64
|
||||
[(set RegI32:$d, (trunc RegI64:$a))]>;
|
||||
|
||||
def CVT_u32_f32
|
||||
: InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rni.u32.f32\t$d, $a",
|
||||
: InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a",
|
||||
[(set RegI32:$d, (fp_to_uint RegF32:$a))]>;
|
||||
|
||||
def CVT_u32_f64
|
||||
: InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rni.u32.f64\t$d, $a",
|
||||
: InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a",
|
||||
[(set RegI32:$d, (fp_to_uint RegF64:$a))]>;
|
||||
|
||||
// Conversion to u64
|
||||
@ -891,11 +906,11 @@ def CVT_u64_u32
|
||||
[(set RegI64:$d, (zext RegI32:$a))]>;
|
||||
|
||||
def CVT_u64_f32
|
||||
: InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rni.u64.f32\t$d, $a",
|
||||
: InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a",
|
||||
[(set RegI64:$d, (fp_to_uint RegF32:$a))]>;
|
||||
|
||||
def CVT_u64_f64
|
||||
: InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rni.u64.f64\t$d, $a",
|
||||
: InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a",
|
||||
[(set RegI64:$d, (fp_to_uint RegF64:$a))]>;
|
||||
|
||||
// Conversion to f32
|
||||
|
@ -22,14 +22,14 @@ define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
|
||||
}
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: add.f32 r0, r1, r2
|
||||
; CHECK: add.rn.f32 r0, r1, r2
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fadd float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: add.f64 rd0, rd1, rd2
|
||||
; CHECK: add.rn.f64 rd0, rd1, rd2
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fadd double %x, %y
|
||||
ret double %z
|
||||
@ -57,14 +57,14 @@ define ptx_device i64 @t2_u64(i64 %x) {
|
||||
}
|
||||
|
||||
define ptx_device float @t2_f32(float %x) {
|
||||
; CHECK: add.f32 r0, r1, 0F3F800000;
|
||||
; CHECK: add.rn.f32 r0, r1, 0F3F800000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fadd float %x, 1.0
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t2_f64(double %x) {
|
||||
; CHECK: add.f64 rd0, rd1, 0D3FF0000000000000;
|
||||
; CHECK: add.rn.f64 rd0, rd1, 0D3FF0000000000000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fadd double %x, 1.0
|
||||
ret double %z
|
||||
|
@ -31,7 +31,7 @@ define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
|
||||
; CHECK: cvt.rni.pred.f32 p0, r1;
|
||||
; CHECK: cvt.rzi.pred.f32 p0, r1;
|
||||
; CHECK: ret;
|
||||
%a = fptoui float %x to i1
|
||||
%b = and i1 %a, %y
|
||||
@ -40,7 +40,7 @@ define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
|
||||
; CHECK: cvt.rni.pred.f64 p0, rd1;
|
||||
; CHECK: cvt.rzi.pred.f64 p0, rd1;
|
||||
; CHECK: ret;
|
||||
%a = fptoui double %x to i1
|
||||
%b = and i1 %a, %y
|
||||
@ -72,14 +72,14 @@ define ptx_device i16 @cvt_i16_i64(i64 %x) {
|
||||
}
|
||||
|
||||
define ptx_device i16 @cvt_i16_f32(float %x) {
|
||||
; CHECK: cvt.rni.u16.f32 rh0, r1;
|
||||
; CHECK: cvt.rzi.u16.f32 rh0, r1;
|
||||
; CHECK: ret;
|
||||
%a = fptoui float %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define ptx_device i16 @cvt_i16_f64(double %x) {
|
||||
; CHECK: cvt.rni.u16.f64 rh0, rd1;
|
||||
; CHECK: cvt.rzi.u16.f64 rh0, rd1;
|
||||
; CHECK: ret;
|
||||
%a = fptoui double %x to i16
|
||||
ret i16 %a
|
||||
@ -109,14 +109,14 @@ define ptx_device i32 @cvt_i32_i64(i64 %x) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_i32_f32(float %x) {
|
||||
; CHECK: cvt.rni.u32.f32 r0, r1;
|
||||
; CHECK: cvt.rzi.u32.f32 r0, r1;
|
||||
; CHECK: ret;
|
||||
%a = fptoui float %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_i32_f64(double %x) {
|
||||
; CHECK: cvt.rni.u32.f64 r0, rd1;
|
||||
; CHECK: cvt.rzi.u32.f64 r0, rd1;
|
||||
; CHECK: ret;
|
||||
%a = fptoui double %x to i32
|
||||
ret i32 %a
|
||||
@ -146,14 +146,14 @@ define ptx_device i64 @cvt_i64_i32(i32 %x) {
|
||||
}
|
||||
|
||||
define ptx_device i64 @cvt_i64_f32(float %x) {
|
||||
; CHECK: cvt.rni.u64.f32 rd0, r1;
|
||||
; CHECK: cvt.rzi.u64.f32 rd0, r1;
|
||||
; CHECK: ret;
|
||||
%a = fptoui float %x to i64
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define ptx_device i64 @cvt_i64_f64(double %x) {
|
||||
; CHECK: cvt.rni.u64.f64 rd0, rd1;
|
||||
; CHECK: cvt.rzi.u64.f64 rd0, rd1;
|
||||
; CHECK: ret;
|
||||
%a = fptoui double %x to i64
|
||||
ret i64 %a
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: div.approx.f32 r0, r1, r2;
|
||||
; CHECK: div.f32 r0, r1, r2;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fdiv float %x, %y
|
||||
ret float %a
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: div.approx.f32 r0, r1, r2;
|
||||
; CHECK: div.rn.f32 r0, r1, r2;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fdiv float %x, %y
|
||||
ret float %a
|
||||
|
@ -11,28 +11,28 @@
|
||||
;}
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: mul.f32 r0, r1, r2
|
||||
; CHECK: mul.rn.f32 r0, r1, r2
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fmul float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: mul.f64 rd0, rd1, rd2
|
||||
; CHECK: mul.rn.f64 rd0, rd1, rd2
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fmul double %x, %y
|
||||
ret double %z
|
||||
}
|
||||
|
||||
define ptx_device float @t2_f32(float %x) {
|
||||
; CHECK: mul.f32 r0, r1, 0F40A00000;
|
||||
; CHECK: mul.rn.f32 r0, r1, 0F40A00000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fmul float %x, 5.0
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t2_f64(double %x) {
|
||||
; CHECK: mul.f64 rd0, rd1, 0D4014000000000000;
|
||||
; CHECK: mul.rn.f64 rd0, rd1, 0D4014000000000000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fmul double %x, 5.0
|
||||
ret double %z
|
||||
|
@ -22,14 +22,14 @@ define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
|
||||
}
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: sub.f32 r0, r1, r2
|
||||
; CHECK: sub.rn.f32 r0, r1, r2
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fsub float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: sub.f64 rd0, rd1, rd2
|
||||
; CHECK: sub.rn.f64 rd0, rd1, rd2
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fsub double %x, %y
|
||||
ret double %z
|
||||
@ -57,14 +57,14 @@ define ptx_device i64 @t2_u64(i64 %x) {
|
||||
}
|
||||
|
||||
define ptx_device float @t2_f32(float %x) {
|
||||
; CHECK: add.f32 r0, r1, 0FBF800000;
|
||||
; CHECK: add.rn.f32 r0, r1, 0FBF800000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fsub float %x, 1.0
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t2_f64(double %x) {
|
||||
; CHECK: add.f64 rd0, rd1, 0DBFF0000000000000;
|
||||
; CHECK: add.rn.f64 rd0, rd1, 0DBFF0000000000000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fsub double %x, 1.0
|
||||
ret double %z
|
||||
|
Loading…
x
Reference in New Issue
Block a user