mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-28 14:10:41 +00:00
AMDGPU: Add fract intrinsic
Remove broken patterns matching it. This was matching the unsafe math pattern and expanding the fix for the buggy instruction from the pattern. The problems are also on CI. Remove the workarounds and only use fract with unsafe math or from the intrinsic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271078 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6be2be5352
commit
14cb586d5e
@ -129,6 +129,13 @@ def int_amdgcn_frexp_exp : Intrinsic<
|
||||
[llvm_i32_ty], [llvm_anyfloat_ty], [IntrNoMem]
|
||||
>;
|
||||
|
||||
// v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0
|
||||
// and always uses rtz, so is not suitable for implementing the OpenCL
|
||||
// fract function. It should be ok on VI.
|
||||
def int_amdgcn_fract : Intrinsic<
|
||||
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
|
||||
>;
|
||||
|
||||
def int_amdgcn_class : Intrinsic<
|
||||
[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]
|
||||
>;
|
||||
|
@ -258,25 +258,6 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <
|
||||
|
||||
} // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1
|
||||
|
||||
let Predicates = [isCI] in {
|
||||
|
||||
// Convert (x - floor(x)) to fract(x)
|
||||
def : Pat <
|
||||
(f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
|
||||
(f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
|
||||
(V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
|
||||
>;
|
||||
|
||||
// Convert (x + (-floor(x))) to fract(x)
|
||||
def : Pat <
|
||||
(f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
|
||||
(f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
|
||||
(V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
|
||||
>;
|
||||
|
||||
} // End Predicates = [isCI]
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Flat Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1656,6 +1656,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
case Intrinsic::amdgcn_ldexp:
|
||||
return DAG.getNode(AMDGPUISD::LDEXP, DL, VT,
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
|
||||
case Intrinsic::amdgcn_fract:
|
||||
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
|
||||
|
||||
case Intrinsic::amdgcn_class:
|
||||
return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
|
@ -1367,7 +1367,7 @@ defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d, 0x31>, "v_frexp_mant_f64",
|
||||
>;
|
||||
|
||||
defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64",
|
||||
VOP_F64_F64
|
||||
VOP_F64_F64, AMDGPUfract
|
||||
>;
|
||||
} // End SchedRW = [WriteDoubleAdd]
|
||||
|
||||
@ -2469,7 +2469,22 @@ let Predicates = [UnsafeFPMath] in {
|
||||
|
||||
def : RsqPat<V_RSQ_F32_e32, f32>;
|
||||
def : RsqPat<V_RSQ_F64_e32, f64>;
|
||||
}
|
||||
|
||||
// Convert (x - floor(x)) to fract(x)
|
||||
def : Pat <
|
||||
(f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
|
||||
(f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
|
||||
(V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
|
||||
>;
|
||||
|
||||
// Convert (x + (-floor(x))) to fract(x)
|
||||
def : Pat <
|
||||
(f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
|
||||
(f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
|
||||
(V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
|
||||
>;
|
||||
|
||||
} // End Predicates = [UnsafeFPMath]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP2 Patterns
|
||||
@ -3549,21 +3564,6 @@ let Predicates = [isSI] in {
|
||||
// The workaround for the V_FRACT bug is:
|
||||
// fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999)
|
||||
|
||||
// Convert (x + (-floor(x)) to fract(x)
|
||||
def : Pat <
|
||||
(f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
|
||||
(f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
|
||||
(V_CNDMASK_B64_PSEUDO
|
||||
(V_MIN_F64
|
||||
SRCMODS.NONE,
|
||||
(V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
|
||||
SRCMODS.NONE,
|
||||
(V_MOV_B64_PSEUDO 0x3fefffffffffffff),
|
||||
DSTCLAMP.NONE, DSTOMOD.NONE),
|
||||
$x,
|
||||
(V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/))
|
||||
>;
|
||||
|
||||
// Convert floor(x) to (x - fract(x))
|
||||
def : Pat <
|
||||
(f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))),
|
||||
|
@ -1,20 +1,32 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
|
||||
|
||||
; RUN: llc -march=amdgcn -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=SI-UNSAFE -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=VI-UNSAFE -check-prefix=FUNC %s
|
||||
|
||||
declare double @llvm.fabs.f64(double) #0
|
||||
declare double @llvm.floor.f64(double) #0
|
||||
|
||||
; FUNC-LABEL: {{^}}fract_f64:
|
||||
; GCN-DAG: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
|
||||
; SI-DAG: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
|
||||
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
|
||||
; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
|
||||
; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
|
||||
; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]]
|
||||
; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
|
||||
; CI: buffer_store_dwordx2 [[FRC]]
|
||||
; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
|
||||
; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, -[[SUB0]]
|
||||
|
||||
; CI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]]
|
||||
; CI: v_floor_f64_e32 [[FLOORX:v\[[0-9]+:[0-9]+\]]], [[X]]
|
||||
; CI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], [[X]], -[[FLOORX]]
|
||||
|
||||
; GCN-UNSAFE: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]]
|
||||
; GCN-UNSAFE: v_fract_f64_e32 [[FRACT:v\[[0-9]+:[0-9]+\]]], [[X]]
|
||||
|
||||
; GCN: buffer_store_dwordx2 [[FRACT]]
|
||||
define void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) #1 {
|
||||
%x = load double, double addrspace(1)* %src
|
||||
%floor.x = call double @llvm.floor.f64(double %x)
|
||||
@ -24,15 +36,24 @@ define void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) #1
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fract_f64_neg:
|
||||
; GCN-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
|
||||
; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
|
||||
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
|
||||
; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
|
||||
; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
|
||||
; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]]
|
||||
; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
|
||||
; CI: buffer_store_dwordx2 [[FRC]]
|
||||
; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO]]:[[HI]]{{\]}}, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
|
||||
; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO]]:[[HI]]{{\]}}, -[[SUB0]]
|
||||
|
||||
; CI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]]
|
||||
; CI: v_floor_f64_e64 [[FLOORX:v\[[0-9]+:[0-9]+\]]], -[[X]]
|
||||
; CI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -[[X]], -[[FLOORX]]
|
||||
|
||||
; GCN-UNSAFE: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]]
|
||||
; GCN-UNSAFE: v_fract_f64_e64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -[[X]]
|
||||
|
||||
; GCN: buffer_store_dwordx2 [[FRACT]]
|
||||
define void @fract_f64_neg(double addrspace(1)* %out, double addrspace(1)* %src) #1 {
|
||||
%x = load double, double addrspace(1)* %src
|
||||
%neg.x = fsub double -0.0, %x
|
||||
@ -43,15 +64,24 @@ define void @fract_f64_neg(double addrspace(1)* %out, double addrspace(1)* %src)
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fract_f64_neg_abs:
|
||||
; GCN-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]|
|
||||
; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]|
|
||||
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
|
||||
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
|
||||
; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
|
||||
; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
|
||||
; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]]
|
||||
; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
|
||||
; CI: buffer_store_dwordx2 [[FRC]]
|
||||
; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO]]:[[HI]]{{\]}}|, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
|
||||
; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO]]:[[HI]]{{\]}}|, -[[SUB0]]
|
||||
|
||||
; CI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]]
|
||||
; CI: v_floor_f64_e64 [[FLOORX:v\[[0-9]+:[0-9]+\]]], -|[[X]]|
|
||||
; CI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -|[[X]]|, -[[FLOORX]]
|
||||
|
||||
; GCN-UNSAFE: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]]
|
||||
; GCN-UNSAFE: v_fract_f64_e64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -|[[X]]|
|
||||
|
||||
; GCN: buffer_store_dwordx2 [[FRACT]]
|
||||
define void @fract_f64_neg_abs(double addrspace(1)* %out, double addrspace(1)* %src) #1 {
|
||||
%x = load double, double addrspace(1)* %src
|
||||
%abs.x = call double @llvm.fabs.f64(double %x)
|
||||
@ -62,5 +92,20 @@ define void @fract_f64_neg_abs(double addrspace(1)* %out, double addrspace(1)* %
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}multi_use_floor_fract_f64:
|
||||
; VI-UNSAFE: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]]
|
||||
; VI-UNSAFE-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[X]]
|
||||
; VI-UNSAFE-DAG: v_fract_f64_e32 [[FRACT:v\[[0-9]+:[0-9]+\]]], [[X]]
|
||||
; VI-UNSAFE: buffer_store_dwordx2 [[FLOOR]]
|
||||
; VI-UNSAFE: buffer_store_dwordx2 [[FRACT]]
|
||||
define void @multi_use_floor_fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) #1 {
|
||||
%x = load double, double addrspace(1)* %src
|
||||
%floor.x = call double @llvm.floor.f64(double %x)
|
||||
%fract = fsub double %x, %floor.x
|
||||
store volatile double %floor.x, double addrspace(1)* %out
|
||||
store volatile double %fract, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
|
@ -1,18 +1,19 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
|
||||
; XUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=CI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=GCN %s
|
||||
|
||||
declare float @llvm.fabs.f32(float) #0
|
||||
declare float @llvm.floor.f32(float) #0
|
||||
|
||||
; FUNC-LABEL: {{^}}fract_f32:
|
||||
; CI: v_fract_f32_e32 [[RESULT:v[0-9]+]], [[INPUT:v[0-9]+]]
|
||||
; SI: v_floor_f32_e32 [[FLR:v[0-9]+]], [[INPUT:v[0-9]+]]
|
||||
; SI: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[FLR]], [[INPUT]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
; GCN-LABEL: {{^}}fract_f32:
|
||||
; GCN-SAFE: v_floor_f32_e32 [[FLR:v[0-9]+]], [[INPUT:v[0-9]+]]
|
||||
; GCN-SAFE: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[FLR]], [[INPUT]]
|
||||
|
||||
; XEG: FRACT
|
||||
; GCN-UNSAFE: v_fract_f32_e32 [[RESULT:v[0-9]+]], [[INPUT:v[0-9]+]]
|
||||
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) #1 {
|
||||
%x = load float, float addrspace(1)* %src
|
||||
%floor.x = call float @llvm.floor.f32(float %x)
|
||||
@ -21,13 +22,13 @@ define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) #1 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fract_f32_neg:
|
||||
; CI: v_fract_f32_e64 [[RESULT:v[0-9]+]], -[[INPUT:v[0-9]+]]
|
||||
; SI: v_floor_f32_e64 [[FLR:v[0-9]+]], -[[INPUT:v[0-9]+]]
|
||||
; SI: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[INPUT]], [[FLR]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
; GCN-LABEL: {{^}}fract_f32_neg:
|
||||
; GCN-SAFE: v_floor_f32_e64 [[FLR:v[0-9]+]], -[[INPUT:v[0-9]+]]
|
||||
; GCN-SAFE: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[INPUT]], [[FLR]]
|
||||
|
||||
; XEG: FRACT
|
||||
; GCN-UNSAFE: v_fract_f32_e64 [[RESULT:v[0-9]+]], -[[INPUT:v[0-9]+]]
|
||||
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define void @fract_f32_neg(float addrspace(1)* %out, float addrspace(1)* %src) #1 {
|
||||
%x = load float, float addrspace(1)* %src
|
||||
%x.neg = fsub float -0.0, %x
|
||||
@ -37,13 +38,13 @@ define void @fract_f32_neg(float addrspace(1)* %out, float addrspace(1)* %src) #
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fract_f32_neg_abs:
|
||||
; CI: v_fract_f32_e64 [[RESULT:v[0-9]+]], -|[[INPUT:v[0-9]+]]|
|
||||
; SI: v_floor_f32_e64 [[FLR:v[0-9]+]], -|[[INPUT:v[0-9]+]]|
|
||||
; SI: v_sub_f32_e64 [[RESULT:v[0-9]+]], -|[[INPUT]]|, [[FLR]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
; GCN-LABEL: {{^}}fract_f32_neg_abs:
|
||||
; GCN-SAFE: v_floor_f32_e64 [[FLR:v[0-9]+]], -|[[INPUT:v[0-9]+]]|
|
||||
; GCN-SAFE: v_sub_f32_e64 [[RESULT:v[0-9]+]], -|[[INPUT]]|, [[FLR]]
|
||||
|
||||
; XEG: FRACT
|
||||
; GCN-UNSAFE: v_fract_f32_e64 [[RESULT:v[0-9]+]], -|[[INPUT:v[0-9]+]]|
|
||||
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define void @fract_f32_neg_abs(float addrspace(1)* %out, float addrspace(1)* %src) #1 {
|
||||
%x = load float, float addrspace(1)* %src
|
||||
%abs.x = call float @llvm.fabs.f32(float %x)
|
||||
@ -54,5 +55,20 @@ define void @fract_f32_neg_abs(float addrspace(1)* %out, float addrspace(1)* %sr
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}multi_use_floor_fract_f32:
|
||||
; GCN-UNSAFE-DAG: v_floor_f32_e32 [[FLOOR:v[0-9]+]], [[INPUT:v[0-9]+]]
|
||||
; GCN-UNSAFE-DAG: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[INPUT:v[0-9]+]]
|
||||
|
||||
; GCN-UNSAFE: buffer_store_dword [[FLOOR]]
|
||||
; GCN-UNSAFE: buffer_store_dword [[FRACT]]
|
||||
define void @multi_use_floor_fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) #1 {
|
||||
%x = load float, float addrspace(1)* %src
|
||||
%floor.x = call float @llvm.floor.f32(float %x)
|
||||
%fract = fsub float %x, %floor.x
|
||||
store volatile float %floor.x, float addrspace(1)* %out
|
||||
store volatile float %fract, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
|
24
test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
Normal file
24
test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
Normal file
@ -0,0 +1,24 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
||||
|
||||
declare float @llvm.amdgcn.fract.f32(float) #0
|
||||
declare double @llvm.amdgcn.fract.f64(double) #0
|
||||
|
||||
; GCN-LABEL: {{^}}v_fract_f32:
|
||||
; GCN: v_fract_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
|
||||
define void @v_fract_f32(float addrspace(1)* %out, float %src) #1 {
|
||||
%fract = call float @llvm.amdgcn.fract.f32(float %src)
|
||||
store float %fract, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fract_f64:
|
||||
; GCN: v_fract_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
|
||||
define void @v_fract_f64(double addrspace(1)* %out, double %src) #1 {
|
||||
%fract = call double @llvm.amdgcn.fract.f64(double %src)
|
||||
store double %fract, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
Loading…
Reference in New Issue
Block a user