mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-09 05:47:13 +00:00
AMDGPU: Additional pattern for i16 median3 matching
min(max(a, b), max(min(a, b), c)) Differential Revision: https://reviews.llvm.org/D54494 llvm-svn: 346886
This commit is contained in:
parent
d61070716c
commit
646515eb13
@ -1650,20 +1650,33 @@ class FP16Med3Pat<ValueType vt,
|
||||
(med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE)
|
||||
>;
|
||||
|
||||
class Int16Med3Pat<Instruction med3Inst,
|
||||
multiclass Int16Med3Pat<Instruction med3Inst,
|
||||
SDPatternOperator min,
|
||||
SDPatternOperator max,
|
||||
SDPatternOperator max_oneuse,
|
||||
SDPatternOperator min_oneuse,
|
||||
ValueType vt = i32> : GCNPat<
|
||||
ValueType vt = i16> {
|
||||
// This matches 16 permutations of
|
||||
// max(min(x, y), min(max(x, y), z))
|
||||
def : GCNPat <
|
||||
(max (min_oneuse vt:$src0, vt:$src1),
|
||||
(min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
|
||||
(med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
|
||||
>;
|
||||
|
||||
// This matches 16 permutations of
|
||||
// min(max(a, b), max(min(a, b), c))
|
||||
def : GCNPat <
|
||||
(min (max_oneuse vt:$src0, vt:$src1),
|
||||
(max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
|
||||
(med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
|
||||
>;
|
||||
}
|
||||
|
||||
def : FPMed3Pat<f32, V_MED3_F32>;
|
||||
|
||||
let OtherPredicates = [isGFX9] in {
|
||||
def : FP16Med3Pat<f16, V_MED3_F16>;
|
||||
def : Int16Med3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>;
|
||||
def : Int16Med3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
|
||||
defm : Int16Med3Pat<V_MED3_I16, smin, smax, smax_oneuse, smin_oneuse>;
|
||||
defm : Int16Med3Pat<V_MED3_U16, umin, umax, umax_oneuse, umin_oneuse>;
|
||||
} // End Predicates = [isGFX9]
|
||||
|
@ -681,6 +681,28 @@ bb:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_test_smed3_i16_pat_1:
|
||||
; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
|
||||
define amdgpu_kernel void @v_test_smed3_i16_pat_1(i16 addrspace(1)* %arg, i16 addrspace(1)* %out, i16 addrspace(1)* %a.ptr) #1 {
|
||||
bb:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep0 = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i32 %tid
|
||||
%gep1 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 3
|
||||
%gep2 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 8
|
||||
%out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
|
||||
%x = load i16, i16 addrspace(1)* %gep0
|
||||
%y = load i16, i16 addrspace(1)* %gep1
|
||||
%z = load i16, i16 addrspace(1)* %gep2
|
||||
|
||||
%tmp0 = call i16 @smin16(i16 %x, i16 %y)
|
||||
%tmp1 = call i16 @smax16(i16 %x, i16 %y)
|
||||
%tmp2 = call i16 @smax16(i16 %tmp0, i16 %z)
|
||||
%tmp3 = call i16 @smin16(i16 %tmp1, i16 %tmp2)
|
||||
store i16 %tmp3, i16 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #2 = { nounwind readnone alwaysinline }
|
||||
|
@ -716,6 +716,27 @@ bb:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_test_umed3_i16_pat_1:
|
||||
; GFX9: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
define amdgpu_kernel void @v_test_umed3_i16_pat_1(i16 addrspace(1)* %arg, i16 addrspace(1)* %out, i16 addrspace(1)* %a.ptr) #1 {
|
||||
bb:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep0 = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i32 %tid
|
||||
%gep1 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 3
|
||||
%gep2 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 8
|
||||
%out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
|
||||
%x = load i16, i16 addrspace(1)* %gep0
|
||||
%y = load i16, i16 addrspace(1)* %gep1
|
||||
%z = load i16, i16 addrspace(1)* %gep2
|
||||
|
||||
%tmp0 = call i16 @umin16(i16 %x, i16 %y)
|
||||
%tmp1 = call i16 @umax16(i16 %x, i16 %y)
|
||||
%tmp2 = call i16 @umax16(i16 %tmp0, i16 %z)
|
||||
%tmp3 = call i16 @umin16(i16 %tmp1, i16 %tmp2)
|
||||
store i16 %tmp3, i16 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #2 = { nounwind readnone alwaysinline }
|
||||
|
Loading…
x
Reference in New Issue
Block a user