mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-12 22:26:14 +00:00
702b589510
Summary: Multi-dword constant loads generated unnecessary moves from SGPRs into VGPRs, increasing the code size and VGPR pressure. These moves are now folded away. Note that this lack of operand folding was not a problem for VMEM loads, because COPY nodes from VReg_Nnn to VGPR32 are eliminated by the register coalescer. Some tests are updated, note that the fsub.ll test explicitly checks that the move is elided. With the IR generated by current Mesa, the changes are obviously relatively minor: 7063 shaders in 3531 tests Totals: SGPRS: 351872 -> 352560 (0.20 %) VGPRS: 199984 -> 200732 (0.37 %) Code Size: 9876968 -> 9881112 (0.04 %) bytes LDS: 91 -> 91 (0.00 %) blocks Scratch: 1779712 -> 1767424 (-0.69 %) bytes per wave Wait states: 295164 -> 295337 (0.06 %) Totals from affected shaders: SGPRS: 65784 -> 66472 (1.05 %) VGPRS: 38064 -> 38812 (1.97 %) Code Size: 1993828 -> 1997972 (0.21 %) bytes LDS: 42 -> 42 (0.00 %) blocks Scratch: 795648 -> 783360 (-1.54 %) bytes per wave Wait states: 54026 -> 54199 (0.32 %) Reviewers: tstellarAMD, arsenm, mareko Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D15875 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257074 91177308-0d34-0410-b5e6-96231b3b80d8
187 lines
8.0 KiB
LLVM
187 lines
8.0 KiB
LLVM
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
|
; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
|
|
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
|
|
|
; FIXME: Should replace unsafe-fp-math with no signed zeros.
|
|
|
|
declare i32 @llvm.r600.read.tidig.x() #1
|
|
|
|
; FUNC-LABEL: @test_fmin_legacy_f32
|
|
; EG: MIN *
|
|
; SI-SAFE: v_min_legacy_f32_e64
|
|
; SI-NONAN: v_min_f32_e64
|
|
define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> inreg %reg0) #0 {
|
|
%r0 = extractelement <4 x float> %reg0, i32 0
|
|
%r1 = extractelement <4 x float> %reg0, i32 1
|
|
%r2 = fcmp uge float %r0, %r1
|
|
%r3 = select i1 %r2, float %r1, float %r0
|
|
%vec = insertelement <4 x float> undef, float %r3, i32 0
|
|
store <4 x float> %vec, <4 x float> addrspace(1)* %out, align 16
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: @test_fmin_legacy_ule_f32
|
|
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
|
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
|
; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
|
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
|
define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
|
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
|
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
|
|
%a = load float, float addrspace(1)* %gep.0, align 4
|
|
%b = load float, float addrspace(1)* %gep.1, align 4
|
|
|
|
%cmp = fcmp ule float %a, %b
|
|
%val = select i1 %cmp, float %a, float %b
|
|
store float %val, float addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: @test_fmin_legacy_ole_f32
|
|
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
|
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
|
; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
|
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
|
define void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
|
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
|
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
|
|
%a = load float, float addrspace(1)* %gep.0, align 4
|
|
%b = load float, float addrspace(1)* %gep.1, align 4
|
|
|
|
%cmp = fcmp ole float %a, %b
|
|
%val = select i1 %cmp, float %a, float %b
|
|
store float %val, float addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: @test_fmin_legacy_olt_f32
|
|
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
|
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
|
; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
|
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
|
define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
|
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
|
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
|
|
%a = load float, float addrspace(1)* %gep.0, align 4
|
|
%b = load float, float addrspace(1)* %gep.1, align 4
|
|
|
|
%cmp = fcmp olt float %a, %b
|
|
%val = select i1 %cmp, float %a, float %b
|
|
store float %val, float addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: @test_fmin_legacy_ult_f32
|
|
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
|
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
|
; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
|
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
|
define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
|
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
|
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
|
|
%a = load float, float addrspace(1)* %gep.0, align 4
|
|
%b = load float, float addrspace(1)* %gep.1, align 4
|
|
|
|
%cmp = fcmp ult float %a, %b
|
|
%val = select i1 %cmp, float %a, float %b
|
|
store float %val, float addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v1f32:
|
|
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
|
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
|
; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
|
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
|
define void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
|
|
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
|
%gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
|
|
%gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
|
|
|
|
%a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
|
|
%b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
|
|
|
|
%cmp = fcmp ult <1 x float> %a, %b
|
|
%val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
|
|
store <1 x float> %val, <1 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v2f32:
|
|
; SI: buffer_load_dwordx2
|
|
; SI: buffer_load_dwordx2
|
|
; SI-SAFE: v_min_legacy_f32_e32
|
|
; SI-SAFE: v_min_legacy_f32_e32
|
|
|
|
; SI-NONAN: v_min_f32_e32
|
|
; SI-NONAN: v_min_f32_e32
|
|
define void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
|
|
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
|
%gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
|
|
%gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1
|
|
|
|
%a = load <2 x float>, <2 x float> addrspace(1)* %gep.0
|
|
%b = load <2 x float>, <2 x float> addrspace(1)* %gep.1
|
|
|
|
%cmp = fcmp ult <2 x float> %a, %b
|
|
%val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
|
|
store <2 x float> %val, <2 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v3f32:
|
|
; SI-SAFE: v_min_legacy_f32_e32
|
|
; SI-SAFE: v_min_legacy_f32_e32
|
|
; SI-SAFE: v_min_legacy_f32_e32
|
|
|
|
; SI-NONAN: v_min_f32_e32
|
|
; SI-NONAN: v_min_f32_e32
|
|
; SI-NONAN: v_min_f32_e32
|
|
define void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
|
|
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
|
%gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
|
|
%gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
|
|
|
|
%a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
|
|
%b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
|
|
|
|
%cmp = fcmp ult <3 x float> %a, %b
|
|
%val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
|
|
store <3 x float> %val, <3 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: @test_fmin_legacy_ole_f32_multi_use
|
|
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
|
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
|
; SI-NOT: v_min
|
|
; SI: v_cmp_le_f32
|
|
; SI-NEXT: v_cndmask_b32
|
|
; SI-NOT: v_min
|
|
; SI: s_endpgm
|
|
define void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
|
|
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
|
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
|
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
|
|
%a = load float, float addrspace(1)* %gep.0, align 4
|
|
%b = load float, float addrspace(1)* %gep.1, align 4
|
|
|
|
%cmp = fcmp ole float %a, %b
|
|
%val0 = select i1 %cmp, float %a, float %b
|
|
store float %val0, float addrspace(1)* %out0, align 4
|
|
store i1 %cmp, i1 addrspace(1)* %out1
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind readnone }
|