mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-25 12:04:36 +00:00
AMDGPU: Implement f16 fcanonicalize
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@290300 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
15a4f4e4ce
commit
0bb2ef4a14
@ -391,6 +391,7 @@ int TWO_PI = 0x40c90fdb;
|
|||||||
int PI = 0x40490fdb;
|
int PI = 0x40490fdb;
|
||||||
int TWO_PI_INV = 0x3e22f983;
|
int TWO_PI_INV = 0x3e22f983;
|
||||||
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
|
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
|
||||||
|
int FP16_ONE = 0x3C00;
|
||||||
int FP32_ONE = 0x3f800000;
|
int FP32_ONE = 0x3f800000;
|
||||||
int FP32_NEG_ONE = 0xbf800000;
|
int FP32_NEG_ONE = 0xbf800000;
|
||||||
int FP64_ONE = 0x3ff0000000000000;
|
int FP64_ONE = 0x3ff0000000000000;
|
||||||
|
@ -3648,6 +3648,9 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
|
|||||||
|
|
||||||
if (VT == MVT::f64 && !Subtarget->hasFP64Denormals())
|
if (VT == MVT::f64 && !Subtarget->hasFP64Denormals())
|
||||||
return DAG.getConstantFP(0.0, SDLoc(N), VT);
|
return DAG.getConstantFP(0.0, SDLoc(N), VT);
|
||||||
|
|
||||||
|
if (VT == MVT::f16 && !Subtarget->hasFP16Denormals())
|
||||||
|
return DAG.getConstantFP(0.0, SDLoc(N), VT);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (C.isNaN()) {
|
if (C.isNaN()) {
|
||||||
|
@ -1021,6 +1021,11 @@ defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
|
|||||||
|
|
||||||
def : BFEPattern <V_BFE_U32, S_MOV_B32>;
|
def : BFEPattern <V_BFE_U32, S_MOV_B32>;
|
||||||
|
|
||||||
|
def : Pat<
|
||||||
|
(fcanonicalize f16:$src),
|
||||||
|
(V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), 0, $src, 0, 0)
|
||||||
|
>;
|
||||||
|
|
||||||
def : Pat<
|
def : Pat<
|
||||||
(fcanonicalize f32:$src),
|
(fcanonicalize f32:$src),
|
||||||
(V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), 0, $src, 0, 0)
|
(V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), 0, $src, 0, 0)
|
||||||
|
172
test/CodeGen/AMDGPU/fcanonicalize.f16.ll
Normal file
172
test/CodeGen/AMDGPU/fcanonicalize.f16.ll
Normal file
@ -0,0 +1,172 @@
|
|||||||
|
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||||
|
|
||||||
|
declare half @llvm.canonicalize.f16(half) #0
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}v_test_canonicalize_var_f16:
|
||||||
|
; GCN: v_mul_f16_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @v_test_canonicalize_var_f16(half addrspace(1)* %out) #1 {
|
||||||
|
%val = load half, half addrspace(1)* %out
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half %val)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}s_test_canonicalize_var_f16:
|
||||||
|
; GCN: v_mul_f16_e64 [[REG:v[0-9]+]], 1.0, {{s[0-9]+}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @s_test_canonicalize_var_f16(half addrspace(1)* %out, i16 zeroext %val.arg) #1 {
|
||||||
|
%val = bitcast i16 %val.arg to half
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half %val)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_fold_canonicalize_p0_f16(half addrspace(1)* %out) #1 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half 0.0)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_fold_canonicalize_n0_f16(half addrspace(1)* %out) #1 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half -0.0)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_fold_canonicalize_p1_f16(half addrspace(1)* %out) #1 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half 1.0)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_fold_canonicalize_n1_f16(half addrspace(1)* %out) #1 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half -1.0)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c00{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_fold_canonicalize_literal_f16(half addrspace(1)* %out) #1 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half 16.0)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_no_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #3 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_no_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #3 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c00{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_fold_canonicalize_qnan_f16(half addrspace(1)* %out) #1 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half 0xH7C00)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_fold_canonicalize_qnan_value_neg1_f16(half addrspace(1)* %out) #1 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -1 to half))
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_fold_canonicalize_qnan_value_neg2_f16(half addrspace(1)* %out) #1 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -2 to half))
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_fold_canonicalize_snan0_value_f16(half addrspace(1)* %out) #1 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half 0xH7C01)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_fold_canonicalize_snan1_value_f16(half addrspace(1)* %out) #1 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half 0xH7DFF)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_fold_canonicalize_snan2_value_f16(half addrspace(1)* %out) #1 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half 0xHFDFF)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f16:
|
||||||
|
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
|
||||||
|
; GCN: buffer_store_short [[REG]]
|
||||||
|
define void @test_fold_canonicalize_snan3_value_f16(half addrspace(1)* %out) #1 {
|
||||||
|
%canonicalized = call half @llvm.canonicalize.f16(half 0xHFC01)
|
||||||
|
store half %canonicalized, half addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind readnone }
|
||||||
|
attributes #1 = { nounwind }
|
||||||
|
attributes #2 = { nounwind "target-features"="-fp16-denormals,-fp16-denormals" }
|
||||||
|
attributes #3 = { nounwind "target-features"="+fp16-denormals,+fp64-denormals" }
|
Loading…
x
Reference in New Issue
Block a user