mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-13 22:58:50 +00:00
AMDGPU: Convert test cases to the dimension-aware intrinsics
Summary: Also explicitly port over some tests in llvm.amdgcn.image.* that were missing. Some tests are removed because they no longer apply (i.e. explicitly testing building an address vector via insertelement). This is in preparation for the eventual removal of the old-style intrinsics. Some additional notes: - constant-address-space-32bit.ll: change some GCN-NEXT to GCN because the instruction schedule was subtly altered - insert_vector_elt.ll: the old test didn't actually test anything, because %tmp1 was not used; remove the load, because it doesn't work (Because of the amdgpu_ps calling convention? In any case, it's orthogonal to what the test claims to be testing.) Change-Id: Idfa99b6512ad139e755e82b8b89548ab08f0afcf Reviewers: arsenm, rampitec Subscribers: MatzeB, qcolombet, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D48018 llvm-svn: 335229
This commit is contained in:
parent
d9213dadcb
commit
f58aee7cea
@ -1,105 +1,105 @@
|
||||
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
|
||||
define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.i32(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(
|
||||
define float @image_atomic_add(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.add.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.i32(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(
|
||||
define float @image_atomic_sub(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.sub.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.i32(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(
|
||||
define float @image_atomic_smin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.smin.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.i32(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(
|
||||
define float @image_atomic_umin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.umin.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.i32(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(
|
||||
define float @image_atomic_smax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.smax.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.i32(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(
|
||||
define float @image_atomic_umax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.umax.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.i32(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(
|
||||
define float @image_atomic_and(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.and.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.i32(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(
|
||||
define float @image_atomic_or(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.or.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.i32(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(
|
||||
define float @image_atomic_xor(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.xor.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.i32(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(
|
||||
define float @image_atomic_inc(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.inc.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.i32(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(
|
||||
define float @image_atomic_dec(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.dec.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.i32(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(
|
||||
define float @image_atomic_cmpswap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data, i32 inreg %cmp) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
@ -112,19 +112,19 @@ main_body:
|
||||
ret float %r
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.image.atomic.swap.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.add.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.sub.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.smin.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.umin.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.smax.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.umax.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.and.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.or.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.xor.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.inc.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.dec.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.cmpswap.i32(i32, i32, i32, <8 x i32>,i1, i1, i1) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
|
||||
|
||||
declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
|
||||
|
||||
|
@ -1,13 +1,13 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}adjust_writemask_crash_0_nochain:
|
||||
; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
|
||||
; GCN: image_get_lod v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
|
||||
; GCN-NOT: v1
|
||||
; GCN-NOT: v0
|
||||
; GCN: buffer_store_dword v0
|
||||
define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
|
||||
main_body:
|
||||
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
|
||||
@ -17,13 +17,13 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}adjust_writemask_crash_1_nochain:
|
||||
; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
|
||||
; GCN: image_get_lod v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
|
||||
; GCN-NOT: v1
|
||||
; GCN-NOT: v0
|
||||
; GCN: buffer_store_dword v0
|
||||
define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
|
||||
main_body:
|
||||
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
|
||||
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
|
||||
@ -33,13 +33,13 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}adjust_writemask_crash_0_chain:
|
||||
; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
|
||||
; GCN: image_sample v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
|
||||
; GCN-NOT: v1
|
||||
; GCN-NOT: v0
|
||||
; GCN: buffer_store_dword v0
|
||||
define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
|
||||
main_body:
|
||||
%tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
|
||||
@ -49,13 +49,13 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}adjust_writemask_crash_1_chain:
|
||||
; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
|
||||
; GCN: image_sample v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
|
||||
; GCN-NOT: v1
|
||||
; GCN-NOT: v0
|
||||
; GCN: buffer_store_dword v0
|
||||
define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
|
||||
main_body:
|
||||
%tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
|
||||
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
|
||||
@ -66,7 +66,7 @@ main_body:
|
||||
|
||||
define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
|
||||
main_body:
|
||||
%tmp = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp1 = bitcast <4 x float> %tmp to <4 x i32>
|
||||
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
|
||||
@ -76,9 +76,9 @@ main_body:
|
||||
}
|
||||
|
||||
|
||||
declare <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
|
||||
declare <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
|
||||
declare <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
@ -7,7 +7,7 @@
|
||||
define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
|
||||
bb:
|
||||
%tmp = fptosi float %arg0 to i32
|
||||
%tmp1 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> undef, <8 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 undef, <8 x i32> undef, i32 0, i32 0)
|
||||
%tmp2.f = extractelement <4 x float> %tmp1, i32 0
|
||||
%tmp2 = bitcast float %tmp2.f to i32
|
||||
%tmp3 = and i32 %tmp, 7
|
||||
@ -21,7 +21,7 @@ bb:
|
||||
}
|
||||
|
||||
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -204,7 +204,7 @@ define amdgpu_vs i32 @load_i32_hifffffff0(i32 addrspace(6)* inreg %p) #4 {
|
||||
; GCN: v_readfirstlane_b32
|
||||
; GCN-NEXT: v_readfirstlane_b32
|
||||
; SI: s_nop
|
||||
; GCN-NEXT: s_load_dwordx8
|
||||
; GCN: s_load_dwordx8
|
||||
; GCN-NEXT: s_load_dwordx4
|
||||
; GCN: image_sample
|
||||
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler([0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
|
||||
@ -219,7 +219,7 @@ main_body:
|
||||
%29 = bitcast [0 x <8 x i32>] addrspace(6)* %1 to [0 x <4 x i32>] addrspace(6)*
|
||||
%30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28, !amdgpu.uniform !0
|
||||
%31 = load <4 x i32>, <4 x i32> addrspace(6)* %30, align 16, !invariant.load !0
|
||||
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
|
||||
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %31, i1 0, i32 0, i32 0) #8
|
||||
%33 = extractelement <4 x float> %32, i32 0
|
||||
%34 = extractelement <4 x float> %32, i32 1
|
||||
%35 = extractelement <4 x float> %32, i32 2
|
||||
@ -238,7 +238,7 @@ main_body:
|
||||
; GCN: v_readfirstlane_b32
|
||||
; GCN-NEXT: v_readfirstlane_b32
|
||||
; SI: s_nop
|
||||
; GCN-NEXT: s_load_dwordx8
|
||||
; GCN: s_load_dwordx8
|
||||
; GCN-NEXT: s_load_dwordx4
|
||||
; GCN: image_sample
|
||||
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler_nouniform([0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
|
||||
@ -253,7 +253,7 @@ main_body:
|
||||
%29 = bitcast [0 x <8 x i32>] addrspace(6)* %1 to [0 x <4 x i32>] addrspace(6)*
|
||||
%30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28
|
||||
%31 = load <4 x i32>, <4 x i32> addrspace(6)* %30, align 16, !invariant.load !0
|
||||
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
|
||||
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %31, i1 0, i32 0, i32 0) #8
|
||||
%33 = extractelement <4 x float> %32, i32 0
|
||||
%34 = extractelement <4 x float> %32, i32 1
|
||||
%35 = extractelement <4 x float> %32, i32 2
|
||||
@ -272,7 +272,7 @@ main_body:
|
||||
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #7
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #7
|
||||
|
||||
|
||||
!0 = !{}
|
||||
|
@ -44,7 +44,7 @@ if:
|
||||
|
||||
else:
|
||||
%c = fmul float %v, 3.0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%v.else = extractelement <4 x float> %tex, i32 0
|
||||
br label %end
|
||||
|
||||
@ -55,7 +55,7 @@ end:
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind writeonly }
|
||||
|
@ -25,17 +25,18 @@ define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg, i32 inreg %arg1
|
||||
%tmp10 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 32
|
||||
%tmp11 = bitcast i8 addrspace(4)* %tmp10 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0
|
||||
%tmp12 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp11, align 16
|
||||
%tmp13 = shufflevector <3 x i32> %tmp9, <3 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
%tmp14 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp12, i32 15, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tmp13.0 = extractelement <3 x i32> %tmp9, i32 0
|
||||
%tmp13.1 = extractelement <3 x i32> %tmp9, i32 1
|
||||
%tmp14 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp12, i32 0, i32 0) #0
|
||||
%tmp15 = inttoptr i64 %tmp7 to <8 x i32> addrspace(4)*
|
||||
%tmp16 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16
|
||||
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp14, <2 x i32> %tmp13, <8 x i32> %tmp16, i32 15, i1 false, i1 false, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %tmp14, i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp16, i32 0, i32 0) #0
|
||||
%tmp17 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16
|
||||
%tmp18 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp17, i32 15, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tmp18 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 165, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp17, i32 0, i32 0) #0
|
||||
%tmp19 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 64
|
||||
%tmp20 = bitcast i8 addrspace(4)* %tmp19 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0
|
||||
%tmp21 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp20, align 16
|
||||
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp18, <2 x i32> %tmp13, <8 x i32> %tmp21, i32 15, i1 false, i1 false, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %tmp18, i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp21, i32 0, i32 0) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -43,10 +44,10 @@ define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg, i32 inreg %arg1
|
||||
declare i64 @llvm.amdgcn.s.getpc() #1
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #2
|
||||
|
||||
; Function Attrs: nounwind writeonly
|
||||
declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #3
|
||||
declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #3
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone speculatable }
|
||||
|
@ -75,10 +75,9 @@ define amdgpu_kernel void @insertelement_v3f32_3(<3 x float> addrspace(1)* %out,
|
||||
|
||||
; GCN-LABEL: {{^}}insertelement_to_sgpr:
|
||||
; GCN-NOT: v_readfirstlane
|
||||
define amdgpu_ps <4 x float> @insertelement_to_sgpr() nounwind {
|
||||
%tmp = load <4 x i32>, <4 x i32> addrspace(2)* undef
|
||||
%tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0
|
||||
%tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 false, i1 false, i1 false, i1 false, i1 true)
|
||||
define amdgpu_ps <4 x float> @insertelement_to_sgpr(<4 x i32> inreg %samp) nounwind {
|
||||
%tmp1 = insertelement <4 x i32> %samp, i32 0, i32 0
|
||||
%tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float undef, float undef, <8 x i32> undef, <4 x i32> %tmp1, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %tmp2
|
||||
}
|
||||
|
||||
@ -474,7 +473,7 @@ define amdgpu_kernel void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -1,3 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_swap_1d:
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}load_1d:
|
||||
; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
|
||||
@ -370,6 +370,46 @@ main_body:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}getresinfo_dmask0:
|
||||
; GCN-NOT: image
|
||||
; GCN: ; return to shader part epilog
|
||||
define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) #0 {
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 0, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; Ideally, the register allocator would avoid the wait here
|
||||
;
|
||||
; GCN-LABEL: {{^}}image_store_wait:
|
||||
; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
|
||||
; SI: s_waitcnt expcnt(0)
|
||||
; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0)
|
||||
%data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %data, i32 15, i32 %arg4, <8 x i32> %arg2, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI won't merge ds memory operations, because of the signed offset bug, so
|
||||
; we only have check lines for VI.
|
||||
; VI-LABEL: image_load_mmo
|
||||
; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
|
||||
; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
|
||||
define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 {
|
||||
store float 0.000000e+00, float addrspace(3)* %lds
|
||||
%c0 = extractelement <2 x i32> %c, i32 0
|
||||
%c1 = extractelement <2 x i32> %c, i32 1
|
||||
%tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 15, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
|
||||
store float 0.000000e+00, float addrspace(3)* %tmp2
|
||||
ret float %tex
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
|
||||
@ -412,6 +452,7 @@ declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32, i32, <8
|
||||
declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
|
||||
|
||||
declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
|
||||
declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
|
||||
declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
|
||||
declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0
|
||||
|
@ -1,3 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_2d:
|
||||
|
118
test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll
Normal file
118
test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll
Normal file
@ -0,0 +1,118 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_o_2d:
|
||||
; GCN: image_gather4_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_c_o_2d:
|
||||
; GCN: image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_cl_o_2d:
|
||||
; GCN: image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_c_cl_o_2d:
|
||||
; GCN: image_gather4_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_b_o_2d:
|
||||
; GCN: image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_c_b_o_2d:
|
||||
; GCN: image_gather4_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_b_cl_o_2d:
|
||||
; GCN: image_gather4_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_c_b_cl_o_2d:
|
||||
; GCN: image_gather4_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_l_o_2d:
|
||||
; GCN: image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_c_l_o_2d:
|
||||
; GCN: image_gather4_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_lz_o_2d:
|
||||
; GCN: image_gather4_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_c_lz_o_2d:
|
||||
; GCN: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readonly }
|
||||
attributes #2 = { nounwind readnone }
|
@ -1,3 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}sample_1d:
|
||||
@ -400,6 +401,95 @@ main_body:
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}adjust_writemask_sample_0:
|
||||
; GCN: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1{{$}}
|
||||
define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%elt0 = extractelement <4 x float> %r, i32 0
|
||||
ret float %elt0
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}adjust_writemask_sample_01
|
||||
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3{{$}}
|
||||
define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
||||
ret <2 x float> %out
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}adjust_writemask_sample_012
|
||||
; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7{{$}}
|
||||
define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
ret <3 x float> %out
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}adjust_writemask_sample_12
|
||||
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6{{$}}
|
||||
define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
|
||||
ret <2 x float> %out
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}adjust_writemask_sample_03
|
||||
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9{{$}}
|
||||
define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x float> %out
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}adjust_writemask_sample_13
|
||||
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa{{$}}
|
||||
define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
|
||||
ret <2 x float> %out
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}adjust_writemask_sample_123
|
||||
; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe{{$}}
|
||||
define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
|
||||
ret <3 x float> %out
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}adjust_writemask_sample_none_enabled
|
||||
; GCN-NOT: image
|
||||
define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}adjust_writemask_sample_123_to_12
|
||||
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6{{$}}
|
||||
define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
||||
ret <2 x float> %out
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}adjust_writemask_sample_013_to_13
|
||||
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa{{$}}
|
||||
define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
|
||||
ret <2 x float> %out
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
371
test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll
Normal file
371
test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll
Normal file
@ -0,0 +1,371 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}sample_o_1d:
|
||||
; GCN: image_sample_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_o_2d:
|
||||
; GCN: image_sample_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_o_1d:
|
||||
; GCN: image_sample_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_o_2d:
|
||||
; GCN: image_sample_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_cl_o_1d:
|
||||
; GCN: image_sample_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_cl_o_2d:
|
||||
; GCN: image_sample_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_cl_o_1d:
|
||||
; GCN: image_sample_c_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_cl_o_2d:
|
||||
; GCN: image_sample_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_b_o_1d:
|
||||
; GCN: image_sample_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_b_o_2d:
|
||||
; GCN: image_sample_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_b_o_1d:
|
||||
; GCN: image_sample_c_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_b_o_2d:
|
||||
; GCN: image_sample_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_b_cl_o_1d:
|
||||
; GCN: image_sample_b_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_b_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_b_cl_o_2d:
|
||||
; GCN: image_sample_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_b_cl_o_1d:
|
||||
; GCN: image_sample_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_b_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_b_cl_o_2d:
|
||||
; GCN: image_sample_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_d_o_1d:
|
||||
; GCN: image_sample_d_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_d_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_d_o_2d:
|
||||
; GCN: image_sample_d_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_d_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_d_o_1d:
|
||||
; GCN: image_sample_c_d_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_d_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_d_o_2d:
|
||||
; GCN: image_sample_c_d_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_d_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_d_cl_o_1d:
|
||||
; GCN: image_sample_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_d_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_d_cl_o_2d:
|
||||
; GCN: image_sample_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_d_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_d_cl_o_1d:
|
||||
; GCN: image_sample_c_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_d_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_d_cl_o_2d:
|
||||
; GCN: image_sample_c_d_cl_o v[0:3], v[0:15], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_d_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_cd_o_1d:
|
||||
; GCN: image_sample_cd_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_cd_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_cd_o_2d:
|
||||
; GCN: image_sample_cd_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_cd_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_cd_o_1d:
|
||||
; GCN: image_sample_c_cd_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_cd_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_cd_o_2d:
|
||||
; GCN: image_sample_c_cd_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_cd_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_cd_cl_o_1d:
|
||||
; GCN: image_sample_cd_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_cd_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_cd_cl_o_2d:
|
||||
; GCN: image_sample_cd_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_cd_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_cd_cl_o_1d:
|
||||
; GCN: image_sample_c_cd_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_cd_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_cd_cl_o_2d:
|
||||
; GCN: image_sample_c_cd_cl_o v[0:3], v[0:15], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_cd_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_l_o_1d:
|
||||
; GCN: image_sample_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_l_o_2d:
|
||||
; GCN: image_sample_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_l_o_1d:
|
||||
; GCN: image_sample_c_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_l_o_2d:
|
||||
; GCN: image_sample_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_lz_o_1d:
|
||||
; GCN: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_lz_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_lz_o_2d:
|
||||
; GCN: image_sample_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_lz_o_1d:
|
||||
; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_lz_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_lz_o_2d:
|
||||
; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readonly }
|
||||
attributes #2 = { nounwind readnone }
|
@ -26,7 +26,7 @@ define amdgpu_ps float @test2() #0 {
|
||||
%live = call i1 @llvm.amdgcn.ps.live()
|
||||
%live.32 = zext i1 %live to i32
|
||||
%live.32.bc = bitcast i32 %live.32 to float
|
||||
%t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %live.32.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %live.32.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%r = extractelement <4 x float> %t, i32 0
|
||||
ret float %r
|
||||
}
|
||||
@ -49,13 +49,13 @@ dead:
|
||||
end:
|
||||
%tc = phi i32 [ %in, %entry ], [ %tc.dead, %dead ]
|
||||
%tc.bc = bitcast i32 %tc to float
|
||||
%t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %tc.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tc.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
%r = extractelement <4 x float> %t, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
declare i1 @llvm.amdgcn.ps.live() #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -8,9 +8,9 @@
|
||||
; CHECK-NEXT: image_store
|
||||
; CHECK-NEXT: s_endpgm
|
||||
define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float> %d1, i32 %c0, i32 %c1) {
|
||||
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %d0, i32 %c0, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0)
|
||||
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %d0, i32 15, i32 %c0, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.s.waitcnt(i32 3840) ; 0xf00
|
||||
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %d1, i32 %c1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0)
|
||||
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %d1, i32 15, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -24,17 +24,17 @@ define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float>
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0){{$}}
|
||||
; CHECK-NEXT: image_store
|
||||
define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, i32 %c) {
|
||||
%t = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
%t = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %c, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.s.waitcnt(i32 3840) ; 0xf00
|
||||
%c.1 = mul i32 %c, 2
|
||||
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %t, i32 %c.1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %t, i32 15, i32 %c.1, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.s.waitcnt(i32) #0
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
|
||||
declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
|
||||
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
|
||||
declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
@ -10,8 +10,8 @@ target triple = "amdgcn--amdpal"
|
||||
|
||||
define dllexport amdgpu_ps <2 x float> @_amdgpu_ps_main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, <2 x float>, <2 x float>, <2 x float>, <3 x float>, <2 x float>, <2 x float>, <2 x float>, float, float, float, float, float, i32, i32, i32, i32) local_unnamed_addr {
|
||||
.entry:
|
||||
%res = call <2 x float> @llvm.amdgcn.image.sample.l.v2f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%res = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
ret <2 x float> %res
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.amdgcn.image.sample.l.v2f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
|
||||
declare <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
|
||||
|
@ -81,13 +81,8 @@ main_body:
|
||||
%j.f.i4 = bitcast i32 %j.i2 to float
|
||||
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 1, i32 %arg3) #1
|
||||
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 1, i32 %arg3) #1
|
||||
%tmp45 = bitcast float %p2.i to i32
|
||||
%tmp46 = bitcast float %p2.i24 to i32
|
||||
%tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
|
||||
%tmp48 = insertelement <2 x i32> %tmp47, i32 %tmp46, i32 1
|
||||
%tmp39.bc = bitcast <4 x i32> %tmp39 to <4 x i32>
|
||||
%a.bc.i = bitcast <2 x i32> %tmp48 to <2 x float>
|
||||
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i24, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i1 0, i32 0, i32 0)
|
||||
%tmp50 = extractelement <4 x float> %tmp1, i32 2
|
||||
%tmp51 = call float @llvm.fabs.f32(float %tmp50)
|
||||
%tmp52 = fmul float %p2.i18, %p2.i18
|
||||
@ -240,14 +235,14 @@ entry:
|
||||
br i1 %tmp27, label %if, label %else
|
||||
|
||||
if: ; preds = %entry
|
||||
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 0x36D6000000000000, float 0x36DA000000000000>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0x36D6000000000000, float 0x36DA000000000000, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i1 0, i32 0, i32 0)
|
||||
%val.if.0 = extractelement <4 x float> %tmp1, i32 0
|
||||
%val.if.1 = extractelement <4 x float> %tmp1, i32 1
|
||||
%val.if.2 = extractelement <4 x float> %tmp1, i32 2
|
||||
br label %endif
|
||||
|
||||
else: ; preds = %entry
|
||||
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 0x36C4000000000000, float 0x36CC000000000000>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0x36C4000000000000, float 0x36CC000000000000, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i1 0, i32 0, i32 0)
|
||||
%val.else.0 = extractelement <4 x float> %tmp2, i32 0
|
||||
%val.else.1 = extractelement <4 x float> %tmp2, i32 1
|
||||
%val.else.2 = extractelement <4 x float> %tmp2, i32 2
|
||||
@ -352,24 +347,18 @@ bb:
|
||||
br i1 %tmp36, label %bb38, label %bb80
|
||||
|
||||
bb38: ; preds = %bb
|
||||
%tmp52 = bitcast float %p2.i to i32
|
||||
%tmp53 = bitcast float %p2.i6 to i32
|
||||
%tmp54 = insertelement <2 x i32> undef, i32 %tmp52, i32 0
|
||||
%tmp55 = insertelement <2 x i32> %tmp54, i32 %tmp53, i32 1
|
||||
%tmp56 = bitcast <8 x i32> %tmp26 to <8 x i32>
|
||||
%a.bc.i = bitcast <2 x i32> %tmp55 to <2 x float>
|
||||
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp56, <4 x i32> %tmp28, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i6, <8 x i32> %tmp56, <4 x i32> %tmp28, i1 0, i32 0, i32 0)
|
||||
br label %bb71
|
||||
|
||||
bb80: ; preds = %bb
|
||||
%tmp81 = bitcast float %p2.i to i32
|
||||
%tmp82 = bitcast float %p2.i6 to i32
|
||||
%tmp82.2 = add i32 %tmp82, 1
|
||||
%tmp83 = insertelement <2 x i32> undef, i32 %tmp81, i32 0
|
||||
%tmp84 = insertelement <2 x i32> %tmp83, i32 %tmp82.2, i32 1
|
||||
%tmp83 = bitcast i32 %tmp81 to float
|
||||
%tmp84 = bitcast i32 %tmp82.2 to float
|
||||
%tmp85 = bitcast <8 x i32> %tmp26 to <8 x i32>
|
||||
%a.bc.i1 = bitcast <2 x i32> %tmp84 to <2 x float>
|
||||
%tmp3 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i1, <8 x i32> %tmp85, <4 x i32> %tmp28, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp3 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp83, float %tmp84, <8 x i32> %tmp85, <4 x i32> %tmp28, i1 0, i32 0, i32 0)
|
||||
br label %bb71
|
||||
|
||||
bb71: ; preds = %bb80, %bb38
|
||||
@ -387,7 +376,7 @@ bb:
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i32 0, i32 %tid
|
||||
%tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0
|
||||
%tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 7.500000e-01, float 2.500000e-01, <8 x i32> %tmp8, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp10 = extractelement <4 x float> %tmp, i32 0
|
||||
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %tmp10)
|
||||
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
|
||||
@ -402,7 +391,7 @@ bb:
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i32 0, i32 %tid
|
||||
%tmp8 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp7, align 16, !tbaa !0
|
||||
%tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> undef, <4 x i32> %tmp8, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 7.500000e-01, float 2.500000e-01, <8 x i32> undef, <4 x i32> %tmp8, i1 0, i32 0, i32 0)
|
||||
%tmp10 = extractelement <4 x float> %tmp, i32 0
|
||||
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
|
||||
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
|
||||
@ -419,7 +408,7 @@ declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
|
||||
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
|
||||
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -34,14 +34,9 @@ main_body:
|
||||
%j.f.i4 = bitcast i32 %j.i2 to float
|
||||
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) #1
|
||||
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) #1
|
||||
%tmp27 = bitcast float %p2.i to i32
|
||||
%tmp28 = bitcast float %p2.i6 to i32
|
||||
%tmp29 = insertelement <2 x i32> undef, i32 %tmp27, i32 0
|
||||
%tmp30 = insertelement <2 x i32> %tmp29, i32 %tmp28, i32 1
|
||||
%tmp22.bc = bitcast <32 x i8> %tmp22 to <8 x i32>
|
||||
%tmp24.bc = bitcast <16 x i8> %tmp24 to <4 x i32>
|
||||
%tmp30.bc = bitcast <2 x i32> %tmp30 to <2 x float>
|
||||
%tmp31 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp30.bc, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp31 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i6, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i1 0, i32 0, i32 0)
|
||||
|
||||
%tmp32 = extractelement <4 x float> %tmp31, i32 0
|
||||
%tmp33 = extractelement <4 x float> %tmp31, i32 1
|
||||
@ -57,7 +52,7 @@ declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
|
||||
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
|
||||
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
|
||||
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -324,14 +324,6 @@ main_body:
|
||||
%tmp213 = fmul float %tmp205, %tmp191
|
||||
%tmp214 = fmul float %tmp206, %tmp191
|
||||
%tmp215 = fmul float -1.000000e+00, %tmp191
|
||||
%tmp216 = bitcast float %tmp135 to i32
|
||||
%tmp217 = bitcast float %tmp181 to i32
|
||||
%tmp218 = bitcast float %tmp136 to i32
|
||||
%tmp219 = bitcast float %tmp182 to i32
|
||||
%tmp220 = insertelement <8 x i32> undef, i32 %tmp216, i32 0
|
||||
%tmp221 = insertelement <8 x i32> %tmp220, i32 %tmp217, i32 1
|
||||
%tmp222 = insertelement <8 x i32> %tmp221, i32 %tmp218, i32 2
|
||||
%tmp223 = insertelement <8 x i32> %tmp222, i32 %tmp219, i32 3
|
||||
br label %LOOP
|
||||
|
||||
LOOP: ; preds = %ENDIF, %main_body
|
||||
@ -358,14 +350,7 @@ IF: ; preds = %LOOP
|
||||
br label %LOOP65
|
||||
|
||||
ENDIF: ; preds = %LOOP
|
||||
%tmp237 = bitcast float %temp28.0 to i32
|
||||
%tmp238 = bitcast float %temp29.0 to i32
|
||||
%tmp239 = insertelement <8 x i32> %tmp223, i32 %tmp237, i32 4
|
||||
%tmp240 = insertelement <8 x i32> %tmp239, i32 %tmp238, i32 5
|
||||
%tmp241 = insertelement <8 x i32> %tmp240, i32 undef, i32 6
|
||||
%tmp242 = insertelement <8 x i32> %tmp241, i32 undef, i32 7
|
||||
%tmp242.bc = bitcast <8 x i32> %tmp242 to <8 x float>
|
||||
%tmp243 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp242.bc, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp243 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.0, float %temp29.0, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i1 0, i32 0, i32 0)
|
||||
%tmp244 = extractelement <4 x float> %tmp243, i32 3
|
||||
%tmp245 = fcmp oge float %temp30.0, %tmp244
|
||||
%tmp246 = sext i1 %tmp245 to i32
|
||||
@ -396,65 +381,20 @@ LOOP65: ; preds = %ENDIF66, %IF
|
||||
br i1 %tmp262, label %IF67, label %ENDIF66
|
||||
|
||||
IF67: ; preds = %LOOP65
|
||||
%tmp263 = bitcast float %tmp135 to i32
|
||||
%tmp264 = bitcast float %tmp181 to i32
|
||||
%tmp265 = bitcast float %tmp136 to i32
|
||||
%tmp266 = bitcast float %tmp182 to i32
|
||||
%tmp267 = bitcast float %temp28.1 to i32
|
||||
%tmp268 = bitcast float %temp29.1 to i32
|
||||
%tmp269 = insertelement <8 x i32> undef, i32 %tmp263, i32 0
|
||||
%tmp270 = insertelement <8 x i32> %tmp269, i32 %tmp264, i32 1
|
||||
%tmp271 = insertelement <8 x i32> %tmp270, i32 %tmp265, i32 2
|
||||
%tmp272 = insertelement <8 x i32> %tmp271, i32 %tmp266, i32 3
|
||||
%tmp273 = insertelement <8 x i32> %tmp272, i32 %tmp267, i32 4
|
||||
%tmp274 = insertelement <8 x i32> %tmp273, i32 %tmp268, i32 5
|
||||
%tmp275 = insertelement <8 x i32> %tmp274, i32 undef, i32 6
|
||||
%tmp276 = insertelement <8 x i32> %tmp275, i32 undef, i32 7
|
||||
%tmp67.bc = bitcast <4 x i32> %tmp67 to <4 x i32>
|
||||
%tmp276.bc = bitcast <8 x i32> %tmp276 to <8 x float>
|
||||
%tmp277 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp276.bc, <8 x i32> %tmp65, <4 x i32> %tmp67.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp277 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp65, <4 x i32> %tmp67.bc, i1 0, i32 0, i32 0)
|
||||
%tmp278 = extractelement <4 x float> %tmp277, i32 0
|
||||
%tmp279 = extractelement <4 x float> %tmp277, i32 1
|
||||
%tmp280 = extractelement <4 x float> %tmp277, i32 2
|
||||
%tmp281 = extractelement <4 x float> %tmp277, i32 3
|
||||
%tmp282 = fmul float %tmp281, %tmp46
|
||||
%tmp283 = bitcast float %tmp135 to i32
|
||||
%tmp284 = bitcast float %tmp181 to i32
|
||||
%tmp285 = bitcast float %tmp136 to i32
|
||||
%tmp286 = bitcast float %tmp182 to i32
|
||||
%tmp287 = bitcast float %temp28.1 to i32
|
||||
%tmp288 = bitcast float %temp29.1 to i32
|
||||
%tmp289 = insertelement <8 x i32> undef, i32 %tmp283, i32 0
|
||||
%tmp290 = insertelement <8 x i32> %tmp289, i32 %tmp284, i32 1
|
||||
%tmp291 = insertelement <8 x i32> %tmp290, i32 %tmp285, i32 2
|
||||
%tmp292 = insertelement <8 x i32> %tmp291, i32 %tmp286, i32 3
|
||||
%tmp293 = insertelement <8 x i32> %tmp292, i32 %tmp287, i32 4
|
||||
%tmp294 = insertelement <8 x i32> %tmp293, i32 %tmp288, i32 5
|
||||
%tmp295 = insertelement <8 x i32> %tmp294, i32 undef, i32 6
|
||||
%tmp296 = insertelement <8 x i32> %tmp295, i32 undef, i32 7
|
||||
%tmp83.bc = bitcast <4 x i32> %tmp83 to <4 x i32>
|
||||
%tmp296.bc = bitcast <8 x i32> %tmp296 to <8 x float>
|
||||
%tmp297 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp296.bc, <8 x i32> %tmp81, <4 x i32> %tmp83.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp297 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp81, <4 x i32> %tmp83.bc, i1 0, i32 0, i32 0)
|
||||
%tmp298 = extractelement <4 x float> %tmp297, i32 0
|
||||
%tmp299 = extractelement <4 x float> %tmp297, i32 1
|
||||
%tmp300 = extractelement <4 x float> %tmp297, i32 2
|
||||
%tmp301 = bitcast float %tmp135 to i32
|
||||
%tmp302 = bitcast float %tmp181 to i32
|
||||
%tmp303 = bitcast float %tmp136 to i32
|
||||
%tmp304 = bitcast float %tmp182 to i32
|
||||
%tmp305 = bitcast float %temp28.1 to i32
|
||||
%tmp306 = bitcast float %temp29.1 to i32
|
||||
%tmp307 = insertelement <8 x i32> undef, i32 %tmp301, i32 0
|
||||
%tmp308 = insertelement <8 x i32> %tmp307, i32 %tmp302, i32 1
|
||||
%tmp309 = insertelement <8 x i32> %tmp308, i32 %tmp303, i32 2
|
||||
%tmp310 = insertelement <8 x i32> %tmp309, i32 %tmp304, i32 3
|
||||
%tmp311 = insertelement <8 x i32> %tmp310, i32 %tmp305, i32 4
|
||||
%tmp312 = insertelement <8 x i32> %tmp311, i32 %tmp306, i32 5
|
||||
%tmp313 = insertelement <8 x i32> %tmp312, i32 undef, i32 6
|
||||
%tmp314 = insertelement <8 x i32> %tmp313, i32 undef, i32 7
|
||||
%tmp79.bc = bitcast <4 x i32> %tmp79 to <4 x i32>
|
||||
%tmp314.bc = bitcast <8 x i32> %tmp314 to <8 x float>
|
||||
%tmp315 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp314.bc, <8 x i32> %tmp77, <4 x i32> %tmp79.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp315 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp77, <4 x i32> %tmp79.bc, i1 0, i32 0, i32 0)
|
||||
%tmp316 = extractelement <4 x float> %tmp315, i32 0
|
||||
%tmp317 = extractelement <4 x float> %tmp315, i32 1
|
||||
%tmp318 = extractelement <4 x float> %tmp315, i32 2
|
||||
@ -470,22 +410,7 @@ IF67: ; preds = %LOOP65
|
||||
%tmp328 = fadd float %tmp278, %tmp323
|
||||
%tmp329 = fadd float %tmp279, %tmp325
|
||||
%tmp330 = fadd float %tmp280, %tmp327
|
||||
%tmp331 = bitcast float %tmp135 to i32
|
||||
%tmp332 = bitcast float %tmp181 to i32
|
||||
%tmp333 = bitcast float %tmp136 to i32
|
||||
%tmp334 = bitcast float %tmp182 to i32
|
||||
%tmp335 = bitcast float %temp28.1 to i32
|
||||
%tmp336 = bitcast float %temp29.1 to i32
|
||||
%tmp337 = insertelement <8 x i32> undef, i32 %tmp331, i32 0
|
||||
%tmp338 = insertelement <8 x i32> %tmp337, i32 %tmp332, i32 1
|
||||
%tmp339 = insertelement <8 x i32> %tmp338, i32 %tmp333, i32 2
|
||||
%tmp340 = insertelement <8 x i32> %tmp339, i32 %tmp334, i32 3
|
||||
%tmp341 = insertelement <8 x i32> %tmp340, i32 %tmp335, i32 4
|
||||
%tmp342 = insertelement <8 x i32> %tmp341, i32 %tmp336, i32 5
|
||||
%tmp343 = insertelement <8 x i32> %tmp342, i32 undef, i32 6
|
||||
%tmp344 = insertelement <8 x i32> %tmp343, i32 undef, i32 7
|
||||
%tmp344.bc = bitcast <8 x i32> %tmp344 to <8 x float>
|
||||
%tmp345 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp344.bc, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp345 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i1 0, i32 0, i32 0)
|
||||
%tmp346 = extractelement <4 x float> %tmp345, i32 0
|
||||
%tmp347 = extractelement <4 x float> %tmp345, i32 1
|
||||
%tmp348 = extractelement <4 x float> %tmp345, i32 2
|
||||
@ -501,23 +426,8 @@ IF67: ; preds = %LOOP65
|
||||
%tmp358 = fmul float %tmp349, %tmp357
|
||||
%tmp359 = fmul float %tmp350, %tmp357
|
||||
%tmp360 = fmul float %tmp351, %tmp357
|
||||
%tmp361 = bitcast float %tmp135 to i32
|
||||
%tmp362 = bitcast float %tmp181 to i32
|
||||
%tmp363 = bitcast float %tmp136 to i32
|
||||
%tmp364 = bitcast float %tmp182 to i32
|
||||
%tmp365 = bitcast float %temp28.1 to i32
|
||||
%tmp366 = bitcast float %temp29.1 to i32
|
||||
%tmp367 = insertelement <8 x i32> undef, i32 %tmp361, i32 0
|
||||
%tmp368 = insertelement <8 x i32> %tmp367, i32 %tmp362, i32 1
|
||||
%tmp369 = insertelement <8 x i32> %tmp368, i32 %tmp363, i32 2
|
||||
%tmp370 = insertelement <8 x i32> %tmp369, i32 %tmp364, i32 3
|
||||
%tmp371 = insertelement <8 x i32> %tmp370, i32 %tmp365, i32 4
|
||||
%tmp372 = insertelement <8 x i32> %tmp371, i32 %tmp366, i32 5
|
||||
%tmp373 = insertelement <8 x i32> %tmp372, i32 undef, i32 6
|
||||
%tmp374 = insertelement <8 x i32> %tmp373, i32 undef, i32 7
|
||||
%tmp71.bc = bitcast <4 x i32> %tmp71 to <4 x i32>
|
||||
%tmp374.bc = bitcast <8 x i32> %tmp374 to <8 x float>
|
||||
%tmp375 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp374.bc, <8 x i32> %tmp69, <4 x i32> %tmp71.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp375 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp69, <4 x i32> %tmp71.bc, i1 0, i32 0, i32 0)
|
||||
%tmp376 = extractelement <4 x float> %tmp375, i32 0
|
||||
%tmp377 = extractelement <4 x float> %tmp375, i32 1
|
||||
%tmp378 = extractelement <4 x float> %tmp375, i32 2
|
||||
@ -557,23 +467,8 @@ IF67: ; preds = %LOOP65
|
||||
%tmp412 = fadd float %tmp411, %tmp406
|
||||
%tmp413 = fmul float %tmp399, %p2.i24
|
||||
%tmp414 = fadd float %tmp413, %tmp408
|
||||
%tmp415 = bitcast float %tmp135 to i32
|
||||
%tmp416 = bitcast float %tmp181 to i32
|
||||
%tmp417 = bitcast float %tmp136 to i32
|
||||
%tmp418 = bitcast float %tmp182 to i32
|
||||
%tmp419 = bitcast float %temp28.1 to i32
|
||||
%tmp420 = bitcast float %temp29.1 to i32
|
||||
%tmp421 = insertelement <8 x i32> undef, i32 %tmp415, i32 0
|
||||
%tmp422 = insertelement <8 x i32> %tmp421, i32 %tmp416, i32 1
|
||||
%tmp423 = insertelement <8 x i32> %tmp422, i32 %tmp417, i32 2
|
||||
%tmp424 = insertelement <8 x i32> %tmp423, i32 %tmp418, i32 3
|
||||
%tmp425 = insertelement <8 x i32> %tmp424, i32 %tmp419, i32 4
|
||||
%tmp426 = insertelement <8 x i32> %tmp425, i32 %tmp420, i32 5
|
||||
%tmp427 = insertelement <8 x i32> %tmp426, i32 undef, i32 6
|
||||
%tmp428 = insertelement <8 x i32> %tmp427, i32 undef, i32 7
|
||||
%tmp87.bc = bitcast <4 x i32> %tmp87 to <4 x i32>
|
||||
%tmp428.bc = bitcast <8 x i32> %tmp428 to <8 x float>
|
||||
%tmp429 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp428.bc, <8 x i32> %tmp85, <4 x i32> %tmp87.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp429 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp85, <4 x i32> %tmp87.bc, i1 0, i32 0, i32 0)
|
||||
%tmp430 = extractelement <4 x float> %tmp429, i32 0
|
||||
%tmp431 = extractelement <4 x float> %tmp429, i32 1
|
||||
%tmp432 = extractelement <4 x float> %tmp429, i32 2
|
||||
@ -617,16 +512,8 @@ IF67: ; preds = %LOOP65
|
||||
%tmp460 = fadd float %tmp459, 1.500000e+00
|
||||
%tmp461 = fmul float %tmp454, %tmp458
|
||||
%tmp462 = fadd float %tmp461, 1.500000e+00
|
||||
%tmp463 = bitcast float %tmp462 to i32
|
||||
%tmp464 = bitcast float %tmp460 to i32
|
||||
%tmp465 = bitcast float %tmp456 to i32
|
||||
%tmp466 = insertelement <4 x i32> undef, i32 %tmp463, i32 0
|
||||
%tmp467 = insertelement <4 x i32> %tmp466, i32 %tmp464, i32 1
|
||||
%tmp468 = insertelement <4 x i32> %tmp467, i32 %tmp465, i32 2
|
||||
%tmp469 = insertelement <4 x i32> %tmp468, i32 undef, i32 3
|
||||
%tmp91.bc = bitcast <4 x i32> %tmp91 to <4 x i32>
|
||||
%tmp469.bc = bitcast <4 x i32> %tmp469 to <4 x float>
|
||||
%tmp470 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tmp469.bc, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tmp470 = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %tmp462, float %tmp460, float %tmp456, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i1 0, i32 0, i32 0) #0
|
||||
%tmp471 = extractelement <4 x float> %tmp470, i32 0
|
||||
%tmp472 = extractelement <4 x float> %tmp470, i32 1
|
||||
%tmp473 = extractelement <4 x float> %tmp470, i32 2
|
||||
@ -713,23 +600,8 @@ IF67: ; preds = %LOOP65
|
||||
%tmp554 = fadd float %tmp553, %tmp549
|
||||
%tmp555 = fmul float %tmp547, %tmp58
|
||||
%tmp556 = fadd float %tmp555, %tmp550
|
||||
%tmp557 = bitcast float %tmp135 to i32
|
||||
%tmp558 = bitcast float %tmp181 to i32
|
||||
%tmp559 = bitcast float %tmp136 to i32
|
||||
%tmp560 = bitcast float %tmp182 to i32
|
||||
%tmp561 = bitcast float %temp28.1 to i32
|
||||
%tmp562 = bitcast float %temp29.1 to i32
|
||||
%tmp563 = insertelement <8 x i32> undef, i32 %tmp557, i32 0
|
||||
%tmp564 = insertelement <8 x i32> %tmp563, i32 %tmp558, i32 1
|
||||
%tmp565 = insertelement <8 x i32> %tmp564, i32 %tmp559, i32 2
|
||||
%tmp566 = insertelement <8 x i32> %tmp565, i32 %tmp560, i32 3
|
||||
%tmp567 = insertelement <8 x i32> %tmp566, i32 %tmp561, i32 4
|
||||
%tmp568 = insertelement <8 x i32> %tmp567, i32 %tmp562, i32 5
|
||||
%tmp569 = insertelement <8 x i32> %tmp568, i32 undef, i32 6
|
||||
%tmp570 = insertelement <8 x i32> %tmp569, i32 undef, i32 7
|
||||
%tmp75.bc = bitcast <4 x i32> %tmp75 to <4 x i32>
|
||||
%tmp570.bc = bitcast <8 x i32> %tmp570 to <8 x float>
|
||||
%tmp571 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp570.bc, <8 x i32> %tmp73, <4 x i32> %tmp75.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp571 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp73, <4 x i32> %tmp75.bc, i1 0, i32 0, i32 0)
|
||||
%tmp572 = extractelement <4 x float> %tmp571, i32 0
|
||||
%tmp573 = extractelement <4 x float> %tmp571, i32 1
|
||||
%tmp574 = extractelement <4 x float> %tmp571, i32 2
|
||||
@ -745,14 +617,7 @@ IF67: ; preds = %LOOP65
|
||||
ret void
|
||||
|
||||
ENDIF66: ; preds = %LOOP65
|
||||
%tmp585 = bitcast float %temp28.1 to i32
|
||||
%tmp586 = bitcast float %temp29.1 to i32
|
||||
%tmp587 = insertelement <8 x i32> %tmp236, i32 %tmp585, i32 4
|
||||
%tmp588 = insertelement <8 x i32> %tmp587, i32 %tmp586, i32 5
|
||||
%tmp589 = insertelement <8 x i32> %tmp588, i32 undef, i32 6
|
||||
%tmp590 = insertelement <8 x i32> %tmp589, i32 undef, i32 7
|
||||
%tmp590.bc = bitcast <8 x i32> %tmp590 to <8 x float>
|
||||
%tmp591 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp590.bc, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp591 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i1 0, i32 0, i32 0)
|
||||
%tmp592 = extractelement <4 x float> %tmp591, i32 3
|
||||
%tmp593 = fcmp oge float %temp30.1, %tmp592
|
||||
%tmp594 = sext i1 %tmp593 to i32
|
||||
@ -1140,13 +1005,8 @@ main_body:
|
||||
%tmp218 = fmul float %., %tmp53
|
||||
%tmp219 = fmul float %arg13, %tmp46
|
||||
%tmp220 = fmul float %tmp196, %tmp47
|
||||
%tmp221 = bitcast float %p2.i132 to i32
|
||||
%tmp222 = bitcast float %p2.i126 to i32
|
||||
%tmp223 = insertelement <2 x i32> undef, i32 %tmp221, i32 0
|
||||
%tmp224 = insertelement <2 x i32> %tmp223, i32 %tmp222, i32 1
|
||||
%tmp132.bc = bitcast <4 x i32> %tmp132 to <4 x i32>
|
||||
%tmp224.bc = bitcast <2 x i32> %tmp224 to <2 x float>
|
||||
%tmp225 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp224.bc, <8 x i32> %tmp130, <4 x i32> %tmp132.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp225 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i132, float %p2.i126, <8 x i32> %tmp130, <4 x i32> %tmp132.bc, i1 0, i32 0, i32 0)
|
||||
%tmp226 = extractelement <4 x float> %tmp225, i32 0
|
||||
%tmp227 = extractelement <4 x float> %tmp225, i32 1
|
||||
%tmp228 = extractelement <4 x float> %tmp225, i32 2
|
||||
@ -1220,7 +1080,7 @@ LOOP: ; preds = %LOOP, %main_body
|
||||
%tmp281 = insertelement <4 x i32> %tmp280, i32 undef, i32 3
|
||||
%tmp148.bc = bitcast <4 x i32> %tmp148 to <4 x i32>
|
||||
%tmp281.bc = bitcast <4 x i32> %tmp281 to <4 x float>
|
||||
%tmp282 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp281.bc, <8 x i32> %tmp146, <4 x i32> %tmp148.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp282 = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %temp168.0, float %temp169.0, float 0.0, <8 x i32> %tmp146, <4 x i32> %tmp148.bc, i1 0, i32 0, i32 0)
|
||||
%tmp283 = extractelement <4 x float> %tmp282, i32 3
|
||||
%tmp284 = fadd float %temp168.0, %tmp273
|
||||
%tmp285 = fadd float %temp169.0, %tmp274
|
||||
@ -1279,13 +1139,8 @@ IF189: ; preds = %LOOP
|
||||
%tmp335 = fadd float %p2.i162, %tmp329
|
||||
%tmp336 = fadd float %p2.i156, %tmp331
|
||||
%tmp337 = fadd float %p2.i150, %tmp333
|
||||
%tmp338 = bitcast float %tmp334 to i32
|
||||
%tmp339 = bitcast float %tmp335 to i32
|
||||
%tmp340 = insertelement <2 x i32> undef, i32 %tmp338, i32 0
|
||||
%tmp341 = insertelement <2 x i32> %tmp340, i32 %tmp339, i32 1
|
||||
%tmp136.bc = bitcast <4 x i32> %tmp136 to <4 x i32>
|
||||
%a.bc.i = bitcast <2 x i32> %tmp341 to <2 x float>
|
||||
%tmp0 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp134, <4 x i32> %tmp136.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp0 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp334, float %tmp335, <8 x i32> %tmp134, <4 x i32> %tmp136.bc, i1 0, i32 0, i32 0)
|
||||
%tmp343 = extractelement <4 x float> %tmp0, i32 0
|
||||
%tmp344 = extractelement <4 x float> %tmp0, i32 1
|
||||
%tmp345 = extractelement <4 x float> %tmp0, i32 2
|
||||
@ -1313,25 +1168,15 @@ IF189: ; preds = %LOOP
|
||||
%one.sub.ac.i30 = fmul float %one.sub.a.i29, %tmp353
|
||||
%mul.i31 = fmul float %tmp345, %tmp353
|
||||
%result.i32 = fadd float %mul.i31, %one.sub.ac.i30
|
||||
%tmp358 = bitcast float %tmp336 to i32
|
||||
%tmp359 = bitcast float %tmp337 to i32
|
||||
%tmp360 = insertelement <2 x i32> undef, i32 %tmp358, i32 0
|
||||
%tmp361 = insertelement <2 x i32> %tmp360, i32 %tmp359, i32 1
|
||||
%tmp152.bc = bitcast <4 x i32> %tmp152 to <4 x i32>
|
||||
%a.bc.i3 = bitcast <2 x i32> %tmp361 to <2 x float>
|
||||
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i3, <8 x i32> %tmp150, <4 x i32> %tmp152.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp336, float %tmp337, <8 x i32> %tmp150, <4 x i32> %tmp152.bc, i1 0, i32 0, i32 0)
|
||||
%tmp363 = extractelement <4 x float> %tmp1, i32 2
|
||||
%tmp364 = fmul float %result.i40, %result.i
|
||||
%tmp365 = fmul float %result.i36, %result.i44
|
||||
%tmp366 = fmul float %result.i32, %result.i42
|
||||
%tmp367 = fmul float %tmp354, %tmp229
|
||||
%tmp368 = bitcast float %tmp310 to i32
|
||||
%tmp369 = bitcast float %tmp311 to i32
|
||||
%tmp370 = insertelement <2 x i32> undef, i32 %tmp368, i32 0
|
||||
%tmp371 = insertelement <2 x i32> %tmp370, i32 %tmp369, i32 1
|
||||
%tmp140.bc = bitcast <4 x i32> %tmp140 to <4 x i32>
|
||||
%a.bc.i2 = bitcast <2 x i32> %tmp371 to <2 x float>
|
||||
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i2, <8 x i32> %tmp138, <4 x i32> %tmp140.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp310, float %tmp311, <8 x i32> %tmp138, <4 x i32> %tmp140.bc, i1 0, i32 0, i32 0)
|
||||
%tmp373 = extractelement <4 x float> %tmp2, i32 0
|
||||
%tmp374 = extractelement <4 x float> %tmp2, i32 1
|
||||
%tmp375 = extractelement <4 x float> %tmp2, i32 2
|
||||
@ -1343,13 +1188,8 @@ IF189: ; preds = %LOOP
|
||||
%tmp381 = icmp ne i32 %tmp380, 0
|
||||
%.224 = select i1 %tmp381, float %tmp374, float %tmp373
|
||||
%.225 = select i1 %tmp381, float %tmp376, float %tmp374
|
||||
%tmp382 = bitcast float %tmp320 to i32
|
||||
%tmp383 = bitcast float %tmp321 to i32
|
||||
%tmp384 = insertelement <2 x i32> undef, i32 %tmp382, i32 0
|
||||
%tmp385 = insertelement <2 x i32> %tmp384, i32 %tmp383, i32 1
|
||||
%tmp144.bc = bitcast <4 x i32> %tmp144 to <4 x i32>
|
||||
%a.bc.i1 = bitcast <2 x i32> %tmp385 to <2 x float>
|
||||
%tmp3 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i1, <8 x i32> %tmp142, <4 x i32> %tmp144.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp3 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp320, float %tmp321, <8 x i32> %tmp142, <4 x i32> %tmp144.bc, i1 0, i32 0, i32 0)
|
||||
%tmp387 = extractelement <4 x float> %tmp3, i32 0
|
||||
%tmp388 = extractelement <4 x float> %tmp3, i32 1
|
||||
%tmp389 = extractelement <4 x float> %tmp3, i32 2
|
||||
@ -1442,13 +1282,8 @@ ENDIF197: ; preds = %IF198, %IF189
|
||||
%temp14.0 = phi float [ %tmp465, %IF198 ], [ %tmp457, %IF189 ]
|
||||
%temp13.0 = phi float [ %tmp464, %IF198 ], [ %tmp456, %IF189 ]
|
||||
%temp12.0 = phi float [ %tmp463, %IF198 ], [ %tmp455, %IF189 ]
|
||||
%tmp466 = bitcast float %tmp219 to i32
|
||||
%tmp467 = bitcast float %tmp220 to i32
|
||||
%tmp468 = insertelement <2 x i32> undef, i32 %tmp466, i32 0
|
||||
%tmp469 = insertelement <2 x i32> %tmp468, i32 %tmp467, i32 1
|
||||
%tmp160.bc = bitcast <4 x i32> %tmp160 to <4 x i32>
|
||||
%tmp469.bc = bitcast <2 x i32> %tmp469 to <2 x float>
|
||||
%tmp470 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp469.bc, <8 x i32> %tmp158, <4 x i32> %tmp160.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp470 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp219, float %tmp220, <8 x i32> %tmp158, <4 x i32> %tmp160.bc, i1 0, i32 0, i32 0)
|
||||
%tmp471 = extractelement <4 x float> %tmp470, i32 0
|
||||
%tmp472 = extractelement <4 x float> %tmp470, i32 1
|
||||
%tmp473 = extractelement <4 x float> %tmp470, i32 2
|
||||
@ -1461,13 +1296,8 @@ ENDIF197: ; preds = %IF198, %IF189
|
||||
%tmp480 = fadd float %tmp479, %tmp40
|
||||
%tmp481 = fmul float %tmp474, %tmp41
|
||||
%tmp482 = fadd float %tmp481, %tmp42
|
||||
%tmp483 = bitcast float %p2.i144 to i32
|
||||
%tmp484 = bitcast float %p2.i138 to i32
|
||||
%tmp485 = insertelement <2 x i32> undef, i32 %tmp483, i32 0
|
||||
%tmp486 = insertelement <2 x i32> %tmp485, i32 %tmp484, i32 1
|
||||
%tmp156.bc = bitcast <4 x i32> %tmp156 to <4 x i32>
|
||||
%tmp486.bc = bitcast <2 x i32> %tmp486 to <2 x float>
|
||||
%tmp487 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp486.bc, <8 x i32> %tmp154, <4 x i32> %tmp156.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp487 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i144, float %p2.i138, <8 x i32> %tmp154, <4 x i32> %tmp156.bc, i1 0, i32 0, i32 0)
|
||||
%tmp488 = extractelement <4 x float> %tmp487, i32 0
|
||||
%tmp489 = extractelement <4 x float> %tmp487, i32 1
|
||||
%tmp490 = extractelement <4 x float> %tmp487, i32 2
|
||||
@ -1667,27 +1497,11 @@ ENDIF209: ; preds = %ELSE214, %ELSE211,
|
||||
%tmp651 = fadd float %tmp650, 1.000000e+00
|
||||
%max.0.i11 = call float @llvm.maxnum.f32(float %tmp651, float 0.000000e+00)
|
||||
%clamp.i12 = call float @llvm.minnum.f32(float %max.0.i11, float 1.000000e+00)
|
||||
%tmp653 = bitcast float %tmp642 to i32
|
||||
%tmp654 = bitcast float %tmp644 to i32
|
||||
%tmp655 = bitcast float 0.000000e+00 to i32
|
||||
%tmp656 = insertelement <4 x i32> undef, i32 %tmp653, i32 0
|
||||
%tmp657 = insertelement <4 x i32> %tmp656, i32 %tmp654, i32 1
|
||||
%tmp658 = insertelement <4 x i32> %tmp657, i32 %tmp655, i32 2
|
||||
%tmp659 = insertelement <4 x i32> %tmp658, i32 undef, i32 3
|
||||
%tmp128.bc = bitcast <4 x i32> %tmp128 to <4 x i32>
|
||||
%tmp659.bc = bitcast <4 x i32> %tmp659 to <4 x float>
|
||||
%tmp660 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp659.bc, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp660 = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %tmp642, float %tmp644, float 0.0, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i1 0, i32 0, i32 0)
|
||||
%tmp661 = extractelement <4 x float> %tmp660, i32 0
|
||||
%tmp662 = extractelement <4 x float> %tmp660, i32 1
|
||||
%tmp663 = bitcast float %tmp646 to i32
|
||||
%tmp664 = bitcast float %tmp648 to i32
|
||||
%tmp665 = bitcast float 0.000000e+00 to i32
|
||||
%tmp666 = insertelement <4 x i32> undef, i32 %tmp663, i32 0
|
||||
%tmp667 = insertelement <4 x i32> %tmp666, i32 %tmp664, i32 1
|
||||
%tmp668 = insertelement <4 x i32> %tmp667, i32 %tmp665, i32 2
|
||||
%tmp669 = insertelement <4 x i32> %tmp668, i32 undef, i32 3
|
||||
%tmp669.bc = bitcast <4 x i32> %tmp669 to <4 x float>
|
||||
%tmp670 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp669.bc, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp670 = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %tmp646, float %tmp648, float 0.0, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i1 0, i32 0, i32 0)
|
||||
%tmp671 = extractelement <4 x float> %tmp670, i32 0
|
||||
%tmp672 = extractelement <4 x float> %tmp670, i32 1
|
||||
%tmp673 = fsub float -0.000000e+00, %tmp662
|
||||
@ -1865,10 +1679,10 @@ declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
|
||||
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
|
||||
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
|
||||
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
|
||||
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -355,7 +355,7 @@ bb7: ; preds = %bb4
|
||||
|
||||
; CHECK: [[END]]:
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, <4 x float> %arg2) #0 {
|
||||
define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 {
|
||||
bb:
|
||||
%tmp = fcmp ult float %arg1, 0.000000e+00
|
||||
br i1 %tmp, label %bb3, label %bb4
|
||||
@ -365,7 +365,7 @@ bb3: ; preds = %bb
|
||||
br label %bb4
|
||||
|
||||
bb4: ; preds = %bb3, %bb
|
||||
%tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %arg2, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp6 = extractelement <4 x float> %tmp5, i32 0
|
||||
%tmp7 = fcmp une float %tmp6, 0.000000e+00
|
||||
br i1 %tmp7, label %bb8, label %bb9
|
||||
@ -378,7 +378,7 @@ bb9: ; preds = %bb4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare void @llvm.AMDGPU.kill(float) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -21,7 +21,7 @@ bb3: ; preds = %bb
|
||||
%tmp6 = sext i32 %tmp5 to i64
|
||||
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i64 0, i64 %tmp6
|
||||
%tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0
|
||||
%tmp9 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float bitcast (i32 1061158912 to float), float bitcast (i32 1048576000 to float)>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp9 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float bitcast (i32 1061158912 to float), float bitcast (i32 1048576000 to float), <8 x i32> %tmp8, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp10 = extractelement <4 x float> %tmp9, i32 0
|
||||
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
|
||||
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
|
||||
@ -30,7 +30,7 @@ bb3: ; preds = %bb
|
||||
|
||||
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
|
||||
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
|
||||
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -65,7 +65,7 @@ bb7: ; preds = %bb6
|
||||
br label %bb4
|
||||
|
||||
bb9: ; preds = %bb2
|
||||
%tmp10 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp10 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp11 = extractelement <4 x float> %tmp10, i32 1
|
||||
%tmp12 = extractelement <4 x float> %tmp10, i32 3
|
||||
br label %bb14
|
||||
@ -97,7 +97,7 @@ bb27: ; preds = %bb24
|
||||
|
||||
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
@ -34,9 +34,8 @@ bb:
|
||||
%tmp = load volatile i32, i32 addrspace(1)* undef, align 4
|
||||
%tmp1 = load volatile i32, i32 addrspace(1)* undef, align 4
|
||||
%tmp2 = insertelement <4 x i32> undef, i32 %tmp1, i32 0
|
||||
%tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
|
||||
%tmp3.cast = bitcast <4 x i32> %tmp3 to <4 x float>
|
||||
%tmp4 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tmp3.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp3 = bitcast i32 %tmp1 to float
|
||||
%tmp4 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp3, float %tmp3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp5 = extractelement <4 x float> %tmp4, i32 0
|
||||
%tmp6 = fmul float %tmp5, undef
|
||||
%tmp7 = fadd float %tmp6, %tmp6
|
||||
@ -84,7 +83,7 @@ define amdgpu_kernel void @partially_undef_copy() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
@ -17,25 +17,20 @@ main_body:
|
||||
%j.f.i = bitcast i32 %j.i to float
|
||||
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 3, i32 4, i32 %arg6) #2
|
||||
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 3, i32 4, i32 %arg6) #2
|
||||
%tmp23 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp23 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
|
||||
%tmp24 = extractelement <4 x float> %tmp23, i32 3
|
||||
%tmp25 = fmul float %tmp24, %tmp24
|
||||
%tmp26 = fmul float %p2.i, %p2.i
|
||||
%tmp27 = fadd float %tmp26, %tmp26
|
||||
%tmp28 = bitcast float %tmp27 to i32
|
||||
%tmp29 = insertelement <4 x i32> undef, i32 %tmp28, i32 0
|
||||
%tmp30 = insertelement <4 x i32> %tmp29, i32 0, i32 1
|
||||
%tmp31 = insertelement <4 x i32> %tmp30, i32 undef, i32 2
|
||||
%tmp31.cast = bitcast <4 x i32> %tmp31 to <4 x float>
|
||||
%tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp31.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float 0.0, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp33 = extractelement <4 x float> %tmp32, i32 0
|
||||
%tmp34 = fadd float %tmp33, %tmp33
|
||||
%tmp35 = fadd float %tmp34, %tmp34
|
||||
%tmp36 = fadd float %tmp35, %tmp35
|
||||
%tmp37 = fadd float %tmp36, %tmp36
|
||||
%tmp38 = fadd float %tmp37, %tmp37
|
||||
%tmp39 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp39 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp40 = extractelement <4 x float> %tmp39, i32 0
|
||||
%tmp41 = extractelement <4 x float> %tmp39, i32 1
|
||||
%tmp42 = extractelement <4 x float> %tmp39, i32 2
|
||||
@ -53,17 +48,12 @@ main_body:
|
||||
%tmp54 = insertelement <4 x i32> %tmp53, i32 %tmp51, i32 1
|
||||
%tmp55 = insertelement <4 x i32> %tmp54, i32 %tmp52, i32 2
|
||||
%tmp55.cast = bitcast <4 x i32> %tmp55 to <4 x float>
|
||||
%tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp55.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float %tmp48, float %tmp49, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp57 = extractelement <4 x float> %tmp56, i32 0
|
||||
%tmp58 = fadd float %tmp38, %tmp57
|
||||
%tmp59 = fadd float %tmp46, %tmp46
|
||||
%tmp60 = fadd float %tmp47, %tmp47
|
||||
%tmp61 = bitcast float %tmp59 to i32
|
||||
%tmp62 = bitcast float %tmp60 to i32
|
||||
%tmp63 = insertelement <4 x i32> undef, i32 %tmp61, i32 1
|
||||
%tmp64 = insertelement <4 x i32> %tmp63, i32 %tmp62, i32 2
|
||||
%tmp64.cast = bitcast <4 x i32> %tmp64 to <4 x float>
|
||||
%tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp64.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float undef, float %tmp59, float %tmp60, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp66 = extractelement <4 x float> %tmp65, i32 0
|
||||
%tmp67 = fadd float %tmp58, %tmp66
|
||||
%tmp68 = fmul float %tmp67, 1.250000e-01
|
||||
@ -101,10 +91,7 @@ IF29: ; preds = %LOOP
|
||||
br label %ENDIF25
|
||||
|
||||
ENDIF28: ; preds = %LOOP
|
||||
%tmp85 = insertelement <4 x i32> %tmp72, i32 undef, i32 1
|
||||
%tmp86 = insertelement <4 x i32> %tmp85, i32 undef, i32 2
|
||||
%tmp86.cast = bitcast <4 x i32> %tmp86 to <4 x float>
|
||||
%tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp86.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp88 = extractelement <4 x float> %tmp87, i32 0
|
||||
%tmp89 = fadd float %tmp88, %tmp88
|
||||
br label %LOOP
|
||||
@ -114,9 +101,8 @@ declare float @llvm.minnum.f32(float, float) #1
|
||||
declare float @llvm.maxnum.f32(float, float) #1
|
||||
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
|
||||
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
|
||||
|
||||
attributes #0 = { nounwind "InitialPSInputAddr"="36983" "target-cpu"="tonga" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -5,10 +5,10 @@
|
||||
;
|
||||
;CHECK-LABEL: {{^}}test1:
|
||||
;CHECK-NOT: s_wqm
|
||||
define amdgpu_ps <4 x float> @test1(<8 x i32> inreg %rsrc, <4 x i32> %c) {
|
||||
define amdgpu_ps <4 x float> @test1(<8 x i32> inreg %rsrc, i32 %c) {
|
||||
main_body:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %tex, <4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %c, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %tex, i32 15, i32 %c, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret <4 x float> %tex
|
||||
}
|
||||
|
||||
@ -30,11 +30,9 @@ main_body:
|
||||
%inst24 = extractelement <2 x float> %pos, i32 1
|
||||
%inst25 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 0, i32 0, i32 %m0)
|
||||
%inst26 = tail call float @llvm.amdgcn.interp.p2(float %inst25, float %inst24, i32 0, i32 0, i32 %m0)
|
||||
%inst27 = insertelement <2 x float> undef, float %inst26, i32 0
|
||||
%inst28 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 1, i32 0, i32 %m0)
|
||||
%inst29 = tail call float @llvm.amdgcn.interp.p2(float %inst28, float %inst24, i32 1, i32 0, i32 %m0)
|
||||
%inst30 = insertelement <2 x float> %inst27, float %inst29, i32 1
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %inst30, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %inst26, float %inst29, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
ret <4 x float> %tex
|
||||
}
|
||||
|
||||
@ -49,9 +47,9 @@ main_body:
|
||||
;CHECK: store
|
||||
;CHECK-NOT: exec
|
||||
;CHECK: .size test3
|
||||
define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <4 x float> %c) {
|
||||
define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float %c) {
|
||||
main_body:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex.1 = bitcast <4 x float> %tex to <4 x i32>
|
||||
%tex.2 = extractelement <4 x i32> %tex.1, i32 0
|
||||
|
||||
@ -77,11 +75,9 @@ main_body:
|
||||
%inst24 = extractelement <2 x float> %pos, i32 1
|
||||
%inst25 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 0, i32 0, i32 %m0)
|
||||
%inst26 = tail call float @llvm.amdgcn.interp.p2(float %inst25, float %inst24, i32 0, i32 0, i32 %m0)
|
||||
%inst27 = insertelement <2 x float> undef, float %inst26, i32 0
|
||||
%inst28 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 1, i32 0, i32 %m0)
|
||||
%inst29 = tail call float @llvm.amdgcn.interp.p2(float %inst28, float %inst24, i32 1, i32 0, i32 %m0)
|
||||
%inst30 = insertelement <2 x float> %inst27, float %inst29, i32 1
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %inst30, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %inst26, float %inst29, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex.0 = extractelement <4 x float> %tex, i32 0
|
||||
%tex.1 = extractelement <4 x float> %tex, i32 1
|
||||
%tex.2 = extractelement <4 x float> %tex, i32 2
|
||||
@ -108,8 +104,9 @@ main_body:
|
||||
|
||||
call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> undef, <4 x i32> undef, i32 %c.1, i32 0, i1 0, i1 0)
|
||||
%c.1.bc = bitcast i32 %c.1 to float
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
ret <4 x float> %dtex
|
||||
}
|
||||
|
||||
@ -361,8 +358,9 @@ main_body:
|
||||
|
||||
IF:
|
||||
%c.bc = bitcast i32 %c to float
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%data.if = extractelement <4 x float> %dtex, i32 0
|
||||
br label %END
|
||||
|
||||
@ -403,8 +401,9 @@ main_body:
|
||||
|
||||
IF:
|
||||
%c.bc = bitcast i32 %c to float
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%data.if = extractelement <4 x float> %dtex, i32 0
|
||||
br label %END
|
||||
|
||||
@ -460,7 +459,7 @@ ELSE:
|
||||
END:
|
||||
%coord.END = phi i32 [ %coord.IF, %IF ], [ %coord.ELSE, %ELSE ]
|
||||
%coord.END.bc = bitcast i32 %coord.END to float
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord.END.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord.END.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
ret <4 x float> %tex
|
||||
}
|
||||
|
||||
@ -477,8 +476,9 @@ END:
|
||||
;CHECK-DAG: store
|
||||
define amdgpu_ps float @test_control_flow_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %coord) {
|
||||
main_body:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%dtex.1 = extractelement <4 x float> %dtex, i32 0
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %dtex.1, <4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
|
||||
|
||||
@ -523,8 +523,9 @@ IF:
|
||||
br label %END
|
||||
|
||||
END:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
ret <4 x float> %dtex
|
||||
}
|
||||
|
||||
@ -545,7 +546,7 @@ END:
|
||||
;CHECK: image_sample
|
||||
define amdgpu_ps <4 x float> @test_kill_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <2 x i32> %idx, <2 x float> %data, float %coord, float %coord2, float %z) {
|
||||
main_body:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%idx.0 = extractelement <2 x i32> %idx, i32 0
|
||||
%data.0 = extractelement <2 x float> %data, i32 0
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %data.0, <4 x i32> undef, i32 %idx.0, i32 0, i1 0, i1 0)
|
||||
@ -555,8 +556,9 @@ main_body:
|
||||
%idx.1 = extractelement <2 x i32> %idx, i32 1
|
||||
%data.1 = extractelement <2 x float> %data, i32 1
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %data.1, <4 x i32> undef, i32 %idx.1, i32 0, i1 0, i1 0)
|
||||
%tex2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex2, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tex2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex2.0 = extractelement <4 x float> %tex2, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex2.0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%out = fadd <4 x float> %tex, %dtex
|
||||
|
||||
ret <4 x float> %out
|
||||
@ -576,8 +578,9 @@ main_body:
|
||||
; CHECK: v_cmpx_
|
||||
define amdgpu_ps <4 x float> @test_kill_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %data, float %coord, float %coord2, float %z) {
|
||||
main_body:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
|
||||
call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
|
||||
|
||||
@ -618,7 +621,7 @@ main_body:
|
||||
; CHECK: ; return
|
||||
define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
|
||||
entry:
|
||||
call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %in, <4 x i32> undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %in, i32 15, i32 undef, <8 x i32> undef, i32 0, i32 0)
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
@ -628,7 +631,8 @@ loop:
|
||||
br i1 %cc, label %break, label %body
|
||||
|
||||
body:
|
||||
%c.next = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %c.iv, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%c.iv0 = extractelement <4 x float> %c.iv, i32 0
|
||||
%c.next = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.iv0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
%ctr.next = fadd float %ctr.iv, 2.0
|
||||
br label %loop
|
||||
|
||||
@ -669,7 +673,7 @@ entry:
|
||||
%c.gep = getelementptr [32 x i32], [32 x i32] addrspace(5)* %array, i32 0, i32 %idx
|
||||
%c = load i32, i32 addrspace(5)* %c.gep, align 4
|
||||
%c.bc = bitcast i32 %c to float
|
||||
%t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
|
||||
|
||||
ret void
|
||||
@ -687,8 +691,9 @@ entry:
|
||||
; CHECK: s_and_b64 exec, exec, [[LIVE]]
|
||||
; CHECK-NOT: exec
|
||||
define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
ret <4 x float> %dtex
|
||||
}
|
||||
|
||||
@ -700,8 +705,9 @@ define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
|
||||
; CHECK-NOT: exec
|
||||
define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) nounwind {
|
||||
entry:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
%cc = icmp sgt i32 %c, 0
|
||||
br i1 %cc, label %if, label %else
|
||||
|
||||
@ -733,11 +739,11 @@ main_body:
|
||||
br i1 %cc, label %if, label %else
|
||||
|
||||
if:
|
||||
%r.if = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float 0.0, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%r.if = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
br label %end
|
||||
|
||||
else:
|
||||
%r.else = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 0.0, float bitcast (i32 1 to float)>, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%r.else = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.0, float bitcast (i32 1 to float), <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
|
||||
br label %end
|
||||
|
||||
end:
|
||||
@ -757,8 +763,9 @@ end:
|
||||
define amdgpu_ps float @test_wwm_within_wqm(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %c, i32 %z, float %data) {
|
||||
main_body:
|
||||
%c.bc = bitcast i32 %c to float
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%tex0 = extractelement <4 x float> %tex, i32 0
|
||||
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
|
||||
%cmp = icmp eq i32 %z, 0
|
||||
br i1 %cmp, label %IF, label %ENDIF
|
||||
|
||||
@ -777,14 +784,13 @@ ENDIF:
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
|
||||
declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
|
||||
declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #1
|
||||
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #2
|
||||
declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #3
|
||||
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #3
|
||||
declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #3
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
|
||||
declare void @llvm.AMDGPU.kill(float) #1
|
||||
declare float @llvm.amdgcn.wqm.f32(float) #3
|
||||
declare i32 @llvm.amdgcn.wqm.i32(i32) #3
|
||||
|
Loading…
Reference in New Issue
Block a user