AMDGPU: Generalize matching of v_med3_f32

I think this is safe as long as no inputs are known to ever
be nans.

Also add an intrinsic for fmed3 to be able to handle all safe
math cases.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293598 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2017-01-31 03:07:46 +00:00
parent 6a569f5700
commit 3b595d2304
8 changed files with 807 additions and 6 deletions

View File

@ -206,6 +206,11 @@ def int_amdgcn_class : Intrinsic<
[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]
>;
def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">,
Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]
>;
def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">,
Intrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]

View File

@ -80,6 +80,7 @@ public:
private:
SDValue foldFrameIndex(SDValue N) const;
bool isNoNanSrc(SDValue N) const;
bool isInlineImmediate(const SDNode *N) const;
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
const R600InstrInfo *TII);
@ -143,6 +144,8 @@ private:
bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
@ -188,6 +191,17 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
return SelectionDAGISel::runOnMachineFunction(MF);
}
bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
if (TM.Options.NoNaNsFPMath)
return true;
// TODO: Move into isKnownNeverNaN
if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(N))
return BO->Flags.hasNoNaNs();
return CurDAG->isKnownNeverNaN(N);
}
bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
const SIInstrInfo *TII
= static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
@ -1569,6 +1583,12 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
return true;
}
bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
SelectVOP3Mods(In, Src, SrcMods);
return isNoNanSrc(Src);
}
bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
bool Res = SelectVOP3Mods(In, Src, SrcMods);

View File

@ -635,6 +635,8 @@ def smax_oneuse : HasOneUseBinOp<smax>;
def smin_oneuse : HasOneUseBinOp<smin>;
def umax_oneuse : HasOneUseBinOp<umax>;
def umin_oneuse : HasOneUseBinOp<umin>;
def fminnum_oneuse : HasOneUseBinOp<fminnum>;
def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
} // Properties = [SDNPCommutative, SDNPAssociative]
def sub_oneuse : HasOneUseBinOp<sub>;

View File

@ -2795,6 +2795,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
Op.getOperand(2), DAG.getCondCode(CCOpcode));
}
case Intrinsic::amdgcn_fmed3:
return DAG.getNode(AMDGPUISD::FMED3, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_fmul_legacy:
return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT,
Op.getOperand(1), Op.getOperand(2));

View File

@ -603,6 +603,9 @@ def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod"
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">;
// VOP3Mods, but the input source is known to never be NaN.
def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
//===----------------------------------------------------------------------===//
// SI assembler operands
//===----------------------------------------------------------------------===//

View File

@ -1125,6 +1125,20 @@ def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
def : IntMed3Pat<V_MED3_I32, smax, smax_oneuse, smin_oneuse>;
def : IntMed3Pat<V_MED3_U32, umax, umax_oneuse, umin_oneuse>;
// This matches 16 permutations of
// max(min(x, y), min(max(x, y), z))
class FPMed3Pat<ValueType vt,
Instruction med3Inst> : Pat<
(fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
(VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
(fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
(VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
(vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))),
(med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
def : FPMed3Pat<f32, V_MED3_F32>;
// Undo sub x, c -> add x, -c canonicalization since c is more likely
// an inline immediate than -c.

View File

@ -1,12 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare float @llvm.minnum.f32(float, float) #0
declare float @llvm.maxnum.f32(float, float) #0
declare double @llvm.minnum.f64(double, double) #0
declare double @llvm.maxnum.f64(double, double) #0
; GCN-LABEL: {{^}}v_test_nnan_input_fmed3_r_i_i_f32:
; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, v{{[0-9]+}}
; GCN: v_med3_f32 v{{[0-9]+}}, [[ADD]], 2.0, 4.0
@ -165,6 +159,738 @@ define void @v_test_legacy_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrs
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], [[B]], [[C]]
define void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%a.fneg = fsub float -0.0, %a
%tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
%tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod1:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], -[[B]], [[C]]
define void @v_test_global_nnans_med3_f32_pat0_srcmod1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%b.fneg = fsub float -0.0, %b
%tmp0 = call float @llvm.minnum.f32(float %a, float %b.fneg)
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b.fneg)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod2:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], -[[C]]
define void @v_test_global_nnans_med3_f32_pat0_srcmod2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%c.fneg = fsub float -0.0, %c
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fneg)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod012:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], |[[B]]|, -|[[C]]|
define void @v_test_global_nnans_med3_f32_pat0_srcmod012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%a.fneg = fsub float -0.0, %a
%b.fabs = call float @llvm.fabs.f32(float %b)
%c.fabs = call float @llvm.fabs.f32(float %c)
%c.fabs.fneg = fsub float -0.0, %c.fabs
%tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
%tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_negabs012:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, -|[[A]]|, -|[[B]]|, -|[[C]]|
define void @v_test_global_nnans_med3_f32_pat0_negabs012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%a.fabs = call float @llvm.fabs.f32(float %a)
%a.fabs.fneg = fsub float -0.0, %a.fabs
%b.fabs = call float @llvm.fabs.f32(float %b)
%b.fabs.fneg = fsub float -0.0, %b.fabs
%c.fabs = call float @llvm.fabs.f32(float %c)
%c.fabs.fneg = fsub float -0.0, %c.fabs
%tmp0 = call float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
%tmp1 = call float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_nnan_inputs_med3_f32_pat0:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN-DAG: v_add_f32_e32 [[A_ADD:v[0-9]+]], 1.0, [[A]]
; GCN-DAG: v_add_f32_e32 [[B_ADD:v[0-9]+]], 2.0, [[B]]
; GCN-DAG: v_add_f32_e32 [[C_ADD:v[0-9]+]], 4.0, [[C]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[A_ADD]], [[B_ADD]], [[C_ADD]]
define void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%a.nnan = fadd nnan float %a, 1.0
%b.nnan = fadd nnan float %b, 2.0
%c.nnan = fadd nnan float %c, 4.0
%tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
%tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; 16 combinations
; 0: max(min(x, y), min(max(x, y), z))
; 1: max(min(x, y), min(max(y, x), z))
; 2: max(min(x, y), min(z, max(x, y)))
; 3: max(min(x, y), min(z, max(y, x)))
; 4: max(min(y, x), min(max(x, y), z))
; 5: max(min(y, x), min(max(y, x), z))
; 6: max(min(y, x), min(z, max(x, y)))
; 7: max(min(y, x), min(z, max(y, x)))
;
; + commute outermost max
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
define void @v_test_global_nnans_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat1:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
define void @v_test_global_nnans_med3_f32_pat1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat2:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
define void @v_test_global_nnans_med3_f32_pat2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat3:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
define void @v_test_global_nnans_med3_f32_pat3(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat4:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
define void @v_test_global_nnans_med3_f32_pat4(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat5:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
define void @v_test_global_nnans_med3_f32_pat5(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat6:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
define void @v_test_global_nnans_med3_f32_pat6(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat7:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
define void @v_test_global_nnans_med3_f32_pat7(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat8:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
define void @v_test_global_nnans_med3_f32_pat8(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat9:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
define void @v_test_global_nnans_med3_f32_pat9(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat10:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
define void @v_test_global_nnans_med3_f32_pat10(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat11:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
define void @v_test_global_nnans_med3_f32_pat11(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat12:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
define void @v_test_global_nnans_med3_f32_pat12(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat13:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
define void @v_test_global_nnans_med3_f32_pat13(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat14:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
define void @v_test_global_nnans_med3_f32_pat14(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat15:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
define void @v_test_global_nnans_med3_f32_pat15(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
store float %med3, float addrspace(1)* %outgep
ret void
}
; ---------------------------------------------------------------------
; Negative patterns
; ---------------------------------------------------------------------
; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use0:
; GCN: v_min_f32
; GCN: v_max_f32
; GCN: v_min_f32
; GCN: v_max_f32
define void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
store volatile float %tmp0, float addrspace(1)* undef
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use1:
define void @v_test_safe_med3_f32_pat0_multi_use1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
store volatile float %tmp1, float addrspace(1)* undef
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use2:
define void @v_test_safe_med3_f32_pat0_multi_use2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
store volatile float %tmp2, float addrspace(1)* undef
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0:
define void @v_test_safe_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_nnan_inputs_missing0_med3_f32_pat0:
define void @v_nnan_inputs_missing0_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%a.nnan = fadd float %a, 1.0
%b.nnan = fadd nnan float %b, 2.0
%c.nnan = fadd nnan float %c, 4.0
%tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
%tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_nnan_inputs_missing1_med3_f32_pat0:
define void @v_nnan_inputs_missing1_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%a.nnan = fadd nnan float %a, 1.0
%b.nnan = fadd float %b, 2.0
%c.nnan = fadd nnan float %c, 4.0
%tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
%tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_nnan_inputs_missing2_med3_f32_pat0:
define void @v_nnan_inputs_missing2_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%a.nnan = fadd nnan float %a, 1.0
%b.nnan = fadd nnan float %b, 2.0
%c.nnan = fadd float %c, 4.0
%tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
%tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_min_f32
; GCN: v_max_f32
; GCN: v_min_f32
; GCN: v_max_f32
define void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%a.fneg = fsub float -0.0, %a
%tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep
ret void
}
; A simple min and max is not sufficient
; GCN-LABEL: {{^}}v_test_global_nnans_min_max_f32:
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], [[B]], [[A]]
; GCN: v_min_f32_e32 v{{[0-9]+}}, [[C]], [[MAX]]
define void @v_test_global_nnans_min_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
%a = load volatile float, float addrspace(1)* %gep0
%b = load volatile float, float addrspace(1)* %gep1
%c = load volatile float, float addrspace(1)* %gep2
%max = call float @llvm.maxnum.f32(float %a, float %b)
%minmax = call float @llvm.minnum.f32(float %max, float %c)
store float %minmax, float addrspace(1)* %outgep
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare float @llvm.fabs.f32(float) #0
declare float @llvm.minnum.f32(float, float) #0
declare float @llvm.maxnum.f32(float, float) #0
declare double @llvm.minnum.f64(double, double) #0
declare double @llvm.maxnum.f64(double, double) #0
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }

View File

@ -0,0 +1,28 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}test_fmed3:
; GCN: v_med3_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @test_fmed3(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 {
%mad = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2)
store float %mad, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}test_fmed3_srcmods:
; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, |v{{[0-9]+}}|, -|v{{[0-9]+}}|
define void @test_fmed3_srcmods(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 {
%src0.fneg = fsub float -0.0, %src0
%src1.fabs = call float @llvm.fabs.f32(float %src1)
%src2.fabs = call float @llvm.fabs.f32(float %src2)
%src2.fneg.fabs = fsub float -0.0, %src2.fabs
%mad = call float @llvm.amdgcn.fmed3.f32(float %src0.fneg, float %src1.fabs, float %src2.fneg.fabs)
store float %mad, float addrspace(1)* %out
ret void
}
declare float @llvm.amdgcn.fmed3.f32(float, float, float) #0
declare float @llvm.fabs.f32(float) #0
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }