mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-10 05:41:40 +00:00
[AMDGPU] Restrict v_cndmask_b32 abs/neg modifiers to f32
Summary: D64497 allowed abs/neg source modifiers on v_cndmask_b32 but it doesn't make any sense to apply them to f16 operands; they would interpret the bits of the value as an f32, giving nonsensical results. This patch restricts them to f32 operands. Reviewers: arsenm, hakzsam Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64636 llvm-svn: 365904
This commit is contained in:
parent
409092918d
commit
393984bc1e
@ -230,6 +230,7 @@ private:
|
||||
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
|
||||
|
||||
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
||||
bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
||||
bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
|
||||
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
||||
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
|
||||
@ -2285,6 +2286,15 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
|
||||
return isNoNanSrc(Src);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src,
|
||||
SDValue &SrcMods) const {
|
||||
if (In.getValueType() == MVT::f32)
|
||||
return SelectVOP3Mods(In, Src, SrcMods);
|
||||
Src = In;
|
||||
SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
|
||||
if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
|
||||
return false;
|
||||
|
@ -1140,6 +1140,8 @@ def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
|
||||
def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
|
||||
// VOP3Mods, but the input source is known to never be NaN.
|
||||
def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
|
||||
// VOP3Mods, but only allowed for f32 operands.
|
||||
def VOP3Mods_f32 : ComplexPattern<fAny, 2, "SelectVOP3Mods_f32">;
|
||||
|
||||
def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
|
||||
|
||||
|
@ -791,8 +791,8 @@ def : FMADModsPat<V_MAD_F16, AMDGPUfmad_ftz, f16> {
|
||||
|
||||
multiclass SelectPat <ValueType vt> {
|
||||
def : GCNPat <
|
||||
(vt (select i1:$src0, (VOP3Mods vt:$src1, i32:$src1_mods),
|
||||
(VOP3Mods vt:$src2, i32:$src2_mods))),
|
||||
(vt (select i1:$src0, (VOP3Mods_f32 vt:$src1, i32:$src1_mods),
|
||||
(VOP3Mods_f32 vt:$src2, i32:$src2_mods))),
|
||||
(V_CNDMASK_B32_e64 $src2_mods, $src2, $src1_mods, $src1, $src0)
|
||||
>;
|
||||
}
|
||||
|
@ -3,6 +3,9 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global,+WavefrontSize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
declare half @llvm.fabs.f16(half)
|
||||
declare float @llvm.fabs.f32(float)
|
||||
declare double @llvm.fabs.f64(double)
|
||||
|
||||
; GCN-LABEL: {{^}}v_cnd_nan_nosgpr:
|
||||
; GCN: v_cmp_eq_u32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0
|
||||
@ -416,5 +419,50 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(float a
|
||||
ret void
|
||||
}
|
||||
|
||||
; Source modifiers abs/neg only work for f32
|
||||
|
||||
; GCN-LABEL: {{^}}v_cndmask_abs_neg_f16:
|
||||
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}},
|
||||
define amdgpu_kernel void @v_cndmask_abs_neg_f16(half addrspace(1)* %out, i32 %c, half addrspace(1)* %fptr) #0 {
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%f.gep = getelementptr half, half addrspace(1)* %fptr, i32 %idx
|
||||
%f = load half, half addrspace(1)* %f.gep
|
||||
%f.abs = call half @llvm.fabs.f16(half %f)
|
||||
%f.neg = fneg half %f
|
||||
%setcc = icmp ne i32 %c, 0
|
||||
%select = select i1 %setcc, half %f.abs, half %f.neg
|
||||
store half %select, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cndmask_abs_neg_f32:
|
||||
; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, |v{{[0-9]+}}|,
|
||||
define amdgpu_kernel void @v_cndmask_abs_neg_f32(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx
|
||||
%f = load float, float addrspace(1)* %f.gep
|
||||
%f.abs = call float @llvm.fabs.f32(float %f)
|
||||
%f.neg = fneg float %f
|
||||
%setcc = icmp ne i32 %c, 0
|
||||
%select = select i1 %setcc, float %f.abs, float %f.neg
|
||||
store float %select, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cndmask_abs_neg_f64:
|
||||
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}},
|
||||
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}},
|
||||
define amdgpu_kernel void @v_cndmask_abs_neg_f64(double addrspace(1)* %out, i32 %c, double addrspace(1)* %fptr) #0 {
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%f.gep = getelementptr double, double addrspace(1)* %fptr, i32 %idx
|
||||
%f = load double, double addrspace(1)* %f.gep
|
||||
%f.abs = call double @llvm.fabs.f64(double %f)
|
||||
%f.neg = fneg double %f
|
||||
%setcc = icmp ne i32 %c, 0
|
||||
%select = select i1 %setcc, double %f.abs, double %f.neg
|
||||
store double %select, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
Loading…
Reference in New Issue
Block a user