mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 01:43:57 +00:00
fb163aaea6
When dealing with complex<float>, and similar structures with two single-precision floating-point numbers, especially when such things are being passed around by value, we'll sometimes end up loading both float values by extracting them from one 64-bit integer load. It looks like this: t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64 t16: i64 = srl t13, Constant:i32<32> t17: i32 = truncate t16 t18: f32 = bitcast t17 t19: i32 = truncate t13 t20: f32 = bitcast t19 The problem, especially before the P8 where those bitcasts aren't legal (and get expanded via the stack), is that it would have been better to use two floating-point loads directly. Here we add a target-specific DAGCombine to do just that. In short, we turn: ld 3, 0(5) stw 3, -8(1) rldicl 3, 3, 32, 32 stw 3, -4(1) lfs 3, -4(1) lfs 0, -8(1) into: lfs 3, 4(5) lfs 0, 0(5) llvm-svn: 264988
61 lines
2.0 KiB
LLVM
61 lines
2.0 KiB
LLVM
; RUN: llc < %s | FileCheck %s
|
|
target datalayout = "E-m:e-i64:64-n32:64"
|
|
target triple = "powerpc64-bgq-linux"
|
|
|
|
define void @_Z4testSt7complexIfE(float %v0, float %v1, i64* %ref.tmp, float* %_M_value.realp.i.i, float* %_M_value.imagp.i.i) {
|
|
entry:
|
|
%v2 = load i64, i64* %ref.tmp, align 8
|
|
%v3 = lshr i64 %v2, 32
|
|
%v4 = trunc i64 %v3 to i32
|
|
%v5 = bitcast i32 %v4 to float
|
|
%v6 = trunc i64 %v2 to i32
|
|
%v7 = bitcast i32 %v6 to float
|
|
%mul_ad.i.i = fmul fast float %v5, %v1
|
|
%mul_bc.i.i = fmul fast float %v7, %v0
|
|
%mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i
|
|
%mul_ac.i.i = fmul fast float %v5, %v0
|
|
%mul_bd.i.i = fmul fast float %v7, %v1
|
|
%mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i
|
|
store float %mul_r.i.i, float* %_M_value.realp.i.i, align 4
|
|
store float %mul_i.i.i, float* %_M_value.imagp.i.i, align 4
|
|
ret void
|
|
|
|
; CHECK-LABEL: @_Z4testSt7complexIfE
|
|
; CHECK-NOT: ld {{[0-9]+}}, 0(5)
|
|
; CHECK-NOT: stw
|
|
; CHECK-NOT: rldicl
|
|
; CHECK-DAG: lfs {{[0-9]+}}, 4(5)
|
|
; CHECK-DAG: lfs {{[0-9]+}}, 0(5)
|
|
; CHECK: blr
|
|
}
|
|
|
|
define i64* @_Z4testSt7complexIfE_idx(float %v0, float %v1, i64* %ref.tmp, float* %_M_value.realp.i.i, float* %_M_value.imagp.i.i) {
|
|
entry:
|
|
%r = getelementptr i64, i64* %ref.tmp, i64 1
|
|
%v2 = load i64, i64* %r, align 8
|
|
%v3 = lshr i64 %v2, 32
|
|
%v4 = trunc i64 %v3 to i32
|
|
%v5 = bitcast i32 %v4 to float
|
|
%v6 = trunc i64 %v2 to i32
|
|
%v7 = bitcast i32 %v6 to float
|
|
%mul_ad.i.i = fmul fast float %v5, %v1
|
|
%mul_bc.i.i = fmul fast float %v7, %v0
|
|
%mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i
|
|
%mul_ac.i.i = fmul fast float %v5, %v0
|
|
%mul_bd.i.i = fmul fast float %v7, %v1
|
|
%mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i
|
|
store float %mul_r.i.i, float* %_M_value.realp.i.i, align 4
|
|
store float %mul_i.i.i, float* %_M_value.imagp.i.i, align 4
|
|
ret i64* %r
|
|
|
|
; CHECK-LABEL: @_Z4testSt7complexIfE
|
|
; CHECK-NOT: ld {{[0-9]+}}, 8(5)
|
|
; CHECK-NOT: ldu {{[0-9]+}}, 8(5)
|
|
; CHECK-NOT: stw
|
|
; CHECK-NOT: rldicl
|
|
; CHECK-DAG: lfsu {{[0-9]+}}, 8(5)
|
|
; CHECK-DAG: lfs {{[0-9]+}}, 4(5)
|
|
; CHECK: blr
|
|
}
|
|
|