mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-25 20:59:51 +00:00
f15dfe4eb4
R600 doesn't need to do any scheduling on the SelectionDAG now that it has a very good MachineScheduler. Also, using the VLIW SelectionDAG scheduler was having a major impact on compile times. For example with the phatk kernel here are the LLVM IR to machine code compile times: With Sched::VLIW Total Compile Time: 1.4890 Seconds (User + System) SelectionDAG Instruction Scheduling: 1.1670 Seconds (User + System) With Sched::Source Total Compile Time: 0.3330 Seconds (User + System) SelectionDAG Instruction Scheduling: 0.0070 Seconds (User + System) The code ouput was identical with both schedulers. This may not be true for all programs, but it gives me confidence that there won't be much reduction, if any, in code quality by using Sched::Source. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188215 91177308-0d34-0410-b5e6-96231b3b80d8
42 lines
1.3 KiB
LLVM
42 lines
1.3 KiB
LLVM
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
|
|
|
; CHECK: @fmul_f32
|
|
; CHECK: MUL_IEEE * {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
|
|
define void @fmul_f32() {
|
|
%r0 = call float @llvm.R600.load.input(i32 0)
|
|
%r1 = call float @llvm.R600.load.input(i32 1)
|
|
%r2 = fmul float %r0, %r1
|
|
call void @llvm.AMDGPU.store.output(float %r2, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare float @llvm.R600.load.input(i32) readnone
|
|
|
|
declare void @llvm.AMDGPU.store.output(float, i32)
|
|
|
|
; CHECK: @fmul_v2f32
|
|
; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
|
|
; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
|
|
define void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
|
|
entry:
|
|
%0 = fmul <2 x float> %a, %b
|
|
store <2 x float> %0, <2 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; CHECK: @fmul_v4f32
|
|
; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
|
|
|
define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
|
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
|
|
%a = load <4 x float> addrspace(1) * %in
|
|
%b = load <4 x float> addrspace(1) * %b_ptr
|
|
%result = fmul <4 x float> %a, %b
|
|
store <4 x float> %result, <4 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|