AMDGPU: Convert test cases to the dimension-aware intrinsics

Summary:
Also explicitly port over some tests in llvm.amdgcn.image.* that were
missing. Some tests are removed because they no longer apply (i.e.
explicitly testing building an address vector via insertelement).

This is in preparation for the eventual removal of the old-style
intrinsics.

Some additional notes:
- constant-address-space-32bit.ll: change some GCN-NEXT to GCN because
  the instruction schedule was subtly altered
- insert_vector_elt.ll: the old test didn't actually test anything,
  because %tmp1 was not used; remove the load, because it doesn't work
  (Because of the amdgpu_ps calling convention? In any case, it's
  orthogonal to what the test claims to be testing.)

Change-Id: Idfa99b6512ad139e755e82b8b89548ab08f0afcf

Reviewers: arsenm, rampitec

Subscribers: MatzeB, qcolombet, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D48018

llvm-svn: 335229
This commit is contained in:
Nicolai Haehnle 2018-06-21 13:37:19 +00:00
parent d9213dadcb
commit f58aee7cea
25 changed files with 811 additions and 400 deletions

View File

@ -1,105 +1,105 @@
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.i32(
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(
define float @image_atomic_add(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.add.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.i32(
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(
define float @image_atomic_sub(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.sub.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.i32(
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(
define float @image_atomic_smin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.smin.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.i32(
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(
define float @image_atomic_umin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.umin.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.i32(
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(
define float @image_atomic_smax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.smax.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.i32(
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(
define float @image_atomic_umax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.umax.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.i32(
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(
define float @image_atomic_and(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.and.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.i32(
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(
define float @image_atomic_or(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.or.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.i32(
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(
define float @image_atomic_xor(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.xor.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.i32(
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(
define float @image_atomic_inc(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.inc.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.i32(
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(
define float @image_atomic_dec(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.dec.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.i32(
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(
define float @image_atomic_cmpswap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data, i32 inreg %cmp) #0 {
main_body:
%orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
%orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
@ -112,19 +112,19 @@ main_body:
ret float %r
}
declare i32 @llvm.amdgcn.image.atomic.swap.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.add.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.sub.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.smin.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.umin.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.smax.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.umax.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.and.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.or.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.xor.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.inc.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.dec.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.cmpswap.i32(i32, i32, i32, <8 x i32>,i1, i1, i1) #0
declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0

View File

@ -1,13 +1,13 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}adjust_writemask_crash_0_nochain:
; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
; GCN: image_get_lod v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
; GCN-NOT: v1
; GCN-NOT: v0
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
main_body:
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@ -17,13 +17,13 @@ main_body:
}
; GCN-LABEL: {{^}}adjust_writemask_crash_1_nochain:
; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
; GCN: image_get_lod v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
; GCN-NOT: v1
; GCN-NOT: v0
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
main_body:
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@ -33,13 +33,13 @@ main_body:
}
; GCN-LABEL: {{^}}adjust_writemask_crash_0_chain:
; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
; GCN: image_sample v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
; GCN-NOT: v1
; GCN-NOT: v0
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
main_body:
%tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@ -49,13 +49,13 @@ main_body:
}
; GCN-LABEL: {{^}}adjust_writemask_crash_1_chain:
; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
; GCN: image_sample v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
; GCN-NOT: v1
; GCN-NOT: v0
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
main_body:
%tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@ -66,7 +66,7 @@ main_body:
define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
main_body:
%tmp = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp1 = bitcast <4 x float> %tmp to <4 x i32>
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@ -76,9 +76,9 @@ main_body:
}
declare <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
declare <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
declare <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }

View File

@ -7,7 +7,7 @@
define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
bb:
%tmp = fptosi float %arg0 to i32
%tmp1 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> undef, <8 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false)
%tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 undef, <8 x i32> undef, i32 0, i32 0)
%tmp2.f = extractelement <4 x float> %tmp1, i32 0
%tmp2 = bitcast float %tmp2.f to i32
%tmp3 = and i32 %tmp, 7
@ -21,7 +21,7 @@ bb:
}
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -204,7 +204,7 @@ define amdgpu_vs i32 @load_i32_hifffffff0(i32 addrspace(6)* inreg %p) #4 {
; GCN: v_readfirstlane_b32
; GCN-NEXT: v_readfirstlane_b32
; SI: s_nop
; GCN-NEXT: s_load_dwordx8
; GCN: s_load_dwordx8
; GCN-NEXT: s_load_dwordx4
; GCN: image_sample
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler([0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
@ -219,7 +219,7 @@ main_body:
%29 = bitcast [0 x <8 x i32>] addrspace(6)* %1 to [0 x <4 x i32>] addrspace(6)*
%30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28, !amdgpu.uniform !0
%31 = load <4 x i32>, <4 x i32> addrspace(6)* %30, align 16, !invariant.load !0
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %31, i1 0, i32 0, i32 0) #8
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
@ -238,7 +238,7 @@ main_body:
; GCN: v_readfirstlane_b32
; GCN-NEXT: v_readfirstlane_b32
; SI: s_nop
; GCN-NEXT: s_load_dwordx8
; GCN: s_load_dwordx8
; GCN-NEXT: s_load_dwordx4
; GCN: image_sample
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler_nouniform([0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
@ -253,7 +253,7 @@ main_body:
%29 = bitcast [0 x <8 x i32>] addrspace(6)* %1 to [0 x <4 x i32>] addrspace(6)*
%30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28
%31 = load <4 x i32>, <4 x i32> addrspace(6)* %30, align 16, !invariant.load !0
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %31, i1 0, i32 0, i32 0) #8
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
@ -272,7 +272,7 @@ main_body:
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6
; Function Attrs: nounwind readonly
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #7
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #7
!0 = !{}

View File

@ -44,7 +44,7 @@ if:
else:
%c = fmul float %v, 3.0
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%v.else = extractelement <4 x float> %tex, i32 0
br label %end
@ -55,7 +55,7 @@ end:
}
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
attributes #0 = { nounwind }
attributes #1 = { nounwind writeonly }

View File

@ -25,17 +25,18 @@ define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg, i32 inreg %arg1
%tmp10 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 32
%tmp11 = bitcast i8 addrspace(4)* %tmp10 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0
%tmp12 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp11, align 16
%tmp13 = shufflevector <3 x i32> %tmp9, <3 x i32> undef, <2 x i32> <i32 0, i32 1>
%tmp14 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp12, i32 15, i1 false, i1 false, i1 false, i1 false) #0
%tmp13.0 = extractelement <3 x i32> %tmp9, i32 0
%tmp13.1 = extractelement <3 x i32> %tmp9, i32 1
%tmp14 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp12, i32 0, i32 0) #0
%tmp15 = inttoptr i64 %tmp7 to <8 x i32> addrspace(4)*
%tmp16 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp14, <2 x i32> %tmp13, <8 x i32> %tmp16, i32 15, i1 false, i1 false, i1 false, i1 false) #0
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %tmp14, i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp16, i32 0, i32 0) #0
%tmp17 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16
%tmp18 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp17, i32 15, i1 false, i1 false, i1 false, i1 false) #0
%tmp18 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 165, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp17, i32 0, i32 0) #0
%tmp19 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 64
%tmp20 = bitcast i8 addrspace(4)* %tmp19 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0
%tmp21 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp20, align 16
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp18, <2 x i32> %tmp13, <8 x i32> %tmp21, i32 15, i1 false, i1 false, i1 false, i1 false) #0
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %tmp18, i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp21, i32 0, i32 0) #0
ret void
}
@ -43,10 +44,10 @@ define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg, i32 inreg %arg1
declare i64 @llvm.amdgcn.s.getpc() #1
; Function Attrs: nounwind readonly
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #2
; Function Attrs: nounwind writeonly
declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #3
declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #3
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone speculatable }

View File

@ -75,10 +75,9 @@ define amdgpu_kernel void @insertelement_v3f32_3(<3 x float> addrspace(1)* %out,
; GCN-LABEL: {{^}}insertelement_to_sgpr:
; GCN-NOT: v_readfirstlane
define amdgpu_ps <4 x float> @insertelement_to_sgpr() nounwind {
%tmp = load <4 x i32>, <4 x i32> addrspace(2)* undef
%tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0
%tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 false, i1 false, i1 false, i1 false, i1 true)
define amdgpu_ps <4 x float> @insertelement_to_sgpr(<4 x i32> inreg %samp) nounwind {
%tmp1 = insertelement <4 x i32> %samp, i32 0, i32 0
%tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float undef, float undef, <8 x i32> undef, <4 x i32> %tmp1, i1 0, i32 0, i32 0)
ret <4 x float> %tmp2
}
@ -474,7 +473,7 @@ define amdgpu_kernel void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)
ret void
}
declare <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -1,3 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}atomic_swap_1d:

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; GCN-LABEL: {{^}}load_1d:
; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
@ -370,6 +370,46 @@ main_body:
ret void
}
; GCN-LABEL: {{^}}getresinfo_dmask0:
; GCN-NOT: image
; GCN: ; return to shader part epilog
define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) #0 {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 0, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x float> %r
}
; Ideally, the register allocator would avoid the wait here
;
; GCN-LABEL: {{^}}image_store_wait:
; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
; SI: s_waitcnt expcnt(0)
; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
; GCN: s_waitcnt vmcnt(0)
; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
main_body:
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0)
%data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0)
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %data, i32 15, i32 %arg4, <8 x i32> %arg2, i32 0, i32 0)
ret void
}
; SI won't merge ds memory operations, because of the signed offset bug, so
; we only have check lines for VI.
; VI-LABEL: image_load_mmo
; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 {
store float 0.000000e+00, float addrspace(3)* %lds
%c0 = extractelement <2 x i32> %c, i32 0
%c1 = extractelement <2 x i32> %c, i32 1
%tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 15, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
%tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
store float 0.000000e+00, float addrspace(3)* %tmp2
ret float %tex
}
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
@ -412,6 +452,7 @@ declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32, i32, <8
declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0
declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0

View File

@ -1,3 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; GCN-LABEL: {{^}}gather4_2d:

View File

@ -0,0 +1,118 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; GCN-LABEL: {{^}}gather4_o_2d:
; GCN: image_gather4_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
define amdgpu_ps <4 x float> @gather4_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}gather4_c_o_2d:
; GCN: image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
define amdgpu_ps <4 x float> @gather4_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}gather4_cl_o_2d:
; GCN: image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
define amdgpu_ps <4 x float> @gather4_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}gather4_c_cl_o_2d:
; GCN: image_gather4_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
define amdgpu_ps <4 x float> @gather4_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}gather4_b_o_2d:
; GCN: image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
define amdgpu_ps <4 x float> @gather4_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}gather4_c_b_o_2d:
; GCN: image_gather4_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
define amdgpu_ps <4 x float> @gather4_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}gather4_b_cl_o_2d:
; GCN: image_gather4_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
define amdgpu_ps <4 x float> @gather4_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}gather4_c_b_cl_o_2d:
; GCN: image_gather4_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
define amdgpu_ps <4 x float> @gather4_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}gather4_l_o_2d:
; GCN: image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}gather4_c_l_o_2d:
; GCN: image_gather4_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}gather4_lz_o_2d:
; GCN: image_gather4_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
define amdgpu_ps <4 x float> @gather4_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}gather4_c_lz_o_2d:
; GCN: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
define amdgpu_ps <4 x float> @gather4_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind readnone }

View File

@ -1,3 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; GCN-LABEL: {{^}}sample_1d:
@ -400,6 +401,95 @@ main_body:
ret <4 x float> %v
}
; GCN-LABEL: {{^}}adjust_writemask_sample_0:
; GCN: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1{{$}}
define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%elt0 = extractelement <4 x float> %r, i32 0
ret float %elt0
}
; GCN-LABEL: {{^}}adjust_writemask_sample_01
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3{{$}}
define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
ret <2 x float> %out
}
; GCN-LABEL: {{^}}adjust_writemask_sample_012
; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7{{$}}
define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
ret <3 x float> %out
}
; GCN-LABEL: {{^}}adjust_writemask_sample_12
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6{{$}}
define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
ret <2 x float> %out
}
; GCN-LABEL: {{^}}adjust_writemask_sample_03
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9{{$}}
define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
ret <2 x float> %out
}
; GCN-LABEL: {{^}}adjust_writemask_sample_13
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa{{$}}
define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
ret <2 x float> %out
}
; GCN-LABEL: {{^}}adjust_writemask_sample_123
; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe{{$}}
define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
ret <3 x float> %out
}
; GCN-LABEL: {{^}}adjust_writemask_sample_none_enabled
; GCN-NOT: image
define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %r
}
; GCN-LABEL: {{^}}adjust_writemask_sample_123_to_12
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6{{$}}
define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
ret <2 x float> %out
}
; GCN-LABEL: {{^}}adjust_writemask_sample_013_to_13
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa{{$}}
define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
ret <2 x float> %out
}
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

View File

@ -0,0 +1,371 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; GCN-LABEL: {{^}}sample_o_1d:
; GCN: image_sample_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_o_2d:
; GCN: image_sample_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_o_1d:
; GCN: image_sample_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_o_2d:
; GCN: image_sample_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_cl_o_1d:
; GCN: image_sample_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_cl_o_2d:
; GCN: image_sample_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_cl_o_1d:
; GCN: image_sample_c_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_cl_o_2d:
; GCN: image_sample_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_b_o_1d:
; GCN: image_sample_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_b_o_2d:
; GCN: image_sample_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_b_o_1d:
; GCN: image_sample_c_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_b_o_2d:
; GCN: image_sample_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_b_cl_o_1d:
; GCN: image_sample_b_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_b_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_b_cl_o_2d:
; GCN: image_sample_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_b_cl_o_1d:
; GCN: image_sample_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_b_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_b_cl_o_2d:
; GCN: image_sample_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_d_o_1d:
; GCN: image_sample_d_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_d_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_d_o_2d:
; GCN: image_sample_d_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_d_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_d_o_1d:
; GCN: image_sample_c_d_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_d_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_d_o_2d:
; GCN: image_sample_c_d_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_d_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_d_cl_o_1d:
; GCN: image_sample_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_d_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_d_cl_o_2d:
; GCN: image_sample_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_d_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_d_cl_o_1d:
; GCN: image_sample_c_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_d_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_d_cl_o_2d:
; GCN: image_sample_c_d_cl_o v[0:3], v[0:15], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_d_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_cd_o_1d:
; GCN: image_sample_cd_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_cd_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_cd_o_2d:
; GCN: image_sample_cd_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_cd_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_cd_o_1d:
; GCN: image_sample_c_cd_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_cd_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_cd_o_2d:
; GCN: image_sample_c_cd_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_cd_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_cd_cl_o_1d:
; GCN: image_sample_cd_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_cd_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_cd_cl_o_2d:
; GCN: image_sample_cd_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_cd_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_cd_cl_o_1d:
; GCN: image_sample_c_cd_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_cd_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_cd_cl_o_2d:
; GCN: image_sample_c_cd_cl_o v[0:3], v[0:15], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_cd_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_l_o_1d:
; GCN: image_sample_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_l_o_2d:
; GCN: image_sample_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_l_o_1d:
; GCN: image_sample_c_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_l_o_2d:
; GCN: image_sample_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_lz_o_1d:
; GCN: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_lz_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_lz_o_2d:
; GCN: image_sample_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_lz_o_1d:
; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_lz_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
; GCN-LABEL: {{^}}sample_c_lz_o_2d:
; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind readnone }

View File

@ -26,7 +26,7 @@ define amdgpu_ps float @test2() #0 {
%live = call i1 @llvm.amdgcn.ps.live()
%live.32 = zext i1 %live to i32
%live.32.bc = bitcast i32 %live.32 to float
%t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %live.32.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %live.32.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%r = extractelement <4 x float> %t, i32 0
ret float %r
}
@ -49,13 +49,13 @@ dead:
end:
%tc = phi i32 [ %in, %entry ], [ %tc.dead, %dead ]
%tc.bc = bitcast i32 %tc to float
%t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %tc.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tc.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
%r = extractelement <4 x float> %t, i32 0
ret float %r
}
declare i1 @llvm.amdgcn.ps.live() #1
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -8,9 +8,9 @@
; CHECK-NEXT: image_store
; CHECK-NEXT: s_endpgm
define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float> %d1, i32 %c0, i32 %c1) {
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %d0, i32 %c0, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0)
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %d0, i32 15, i32 %c0, <8 x i32> %rsrc, i32 0, i32 0)
call void @llvm.amdgcn.s.waitcnt(i32 3840) ; 0xf00
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %d1, i32 %c1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0)
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %d1, i32 15, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
ret void
}
@ -24,17 +24,17 @@ define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float>
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0){{$}}
; CHECK-NEXT: image_store
define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, i32 %c) {
%t = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
%t = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %c, <8 x i32> %rsrc, i32 0, i32 0)
call void @llvm.amdgcn.s.waitcnt(i32 3840) ; 0xf00
%c.1 = mul i32 %c, 2
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %t, i32 %c.1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %t, i32 15, i32 %c.1, <8 x i32> %rsrc, i32 0, i32 0)
ret void
}
declare void @llvm.amdgcn.s.waitcnt(i32) #0
declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }

View File

@ -10,8 +10,8 @@ target triple = "amdgcn--amdpal"
define dllexport amdgpu_ps <2 x float> @_amdgpu_ps_main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, <2 x float>, <2 x float>, <2 x float>, <3 x float>, <2 x float>, <2 x float>, <2 x float>, float, float, float, float, float, i32, i32, i32, i32) local_unnamed_addr {
.entry:
%res = call <2 x float> @llvm.amdgcn.image.sample.l.v2f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
%res = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
ret <2 x float> %res
}
declare <2 x float> @llvm.amdgcn.image.sample.l.v2f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
declare <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)

View File

@ -81,13 +81,8 @@ main_body:
%j.f.i4 = bitcast i32 %j.i2 to float
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 1, i32 %arg3) #1
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 1, i32 %arg3) #1
%tmp45 = bitcast float %p2.i to i32
%tmp46 = bitcast float %p2.i24 to i32
%tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
%tmp48 = insertelement <2 x i32> %tmp47, i32 %tmp46, i32 1
%tmp39.bc = bitcast <4 x i32> %tmp39 to <4 x i32>
%a.bc.i = bitcast <2 x i32> %tmp48 to <2 x float>
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i24, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i1 0, i32 0, i32 0)
%tmp50 = extractelement <4 x float> %tmp1, i32 2
%tmp51 = call float @llvm.fabs.f32(float %tmp50)
%tmp52 = fmul float %p2.i18, %p2.i18
@ -240,14 +235,14 @@ entry:
br i1 %tmp27, label %if, label %else
if: ; preds = %entry
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 0x36D6000000000000, float 0x36DA000000000000>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0x36D6000000000000, float 0x36DA000000000000, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i1 0, i32 0, i32 0)
%val.if.0 = extractelement <4 x float> %tmp1, i32 0
%val.if.1 = extractelement <4 x float> %tmp1, i32 1
%val.if.2 = extractelement <4 x float> %tmp1, i32 2
br label %endif
else: ; preds = %entry
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 0x36C4000000000000, float 0x36CC000000000000>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0x36C4000000000000, float 0x36CC000000000000, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i1 0, i32 0, i32 0)
%val.else.0 = extractelement <4 x float> %tmp2, i32 0
%val.else.1 = extractelement <4 x float> %tmp2, i32 1
%val.else.2 = extractelement <4 x float> %tmp2, i32 2
@ -352,24 +347,18 @@ bb:
br i1 %tmp36, label %bb38, label %bb80
bb38: ; preds = %bb
%tmp52 = bitcast float %p2.i to i32
%tmp53 = bitcast float %p2.i6 to i32
%tmp54 = insertelement <2 x i32> undef, i32 %tmp52, i32 0
%tmp55 = insertelement <2 x i32> %tmp54, i32 %tmp53, i32 1
%tmp56 = bitcast <8 x i32> %tmp26 to <8 x i32>
%a.bc.i = bitcast <2 x i32> %tmp55 to <2 x float>
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp56, <4 x i32> %tmp28, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i6, <8 x i32> %tmp56, <4 x i32> %tmp28, i1 0, i32 0, i32 0)
br label %bb71
bb80: ; preds = %bb
%tmp81 = bitcast float %p2.i to i32
%tmp82 = bitcast float %p2.i6 to i32
%tmp82.2 = add i32 %tmp82, 1
%tmp83 = insertelement <2 x i32> undef, i32 %tmp81, i32 0
%tmp84 = insertelement <2 x i32> %tmp83, i32 %tmp82.2, i32 1
%tmp83 = bitcast i32 %tmp81 to float
%tmp84 = bitcast i32 %tmp82.2 to float
%tmp85 = bitcast <8 x i32> %tmp26 to <8 x i32>
%a.bc.i1 = bitcast <2 x i32> %tmp84 to <2 x float>
%tmp3 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i1, <8 x i32> %tmp85, <4 x i32> %tmp28, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp3 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp83, float %tmp84, <8 x i32> %tmp85, <4 x i32> %tmp28, i1 0, i32 0, i32 0)
br label %bb71
bb71: ; preds = %bb80, %bb38
@ -387,7 +376,7 @@ bb:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i32 0, i32 %tid
%tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0
%tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 7.500000e-01, float 2.500000e-01, <8 x i32> %tmp8, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp10 = extractelement <4 x float> %tmp, i32 0
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %tmp10)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
@ -402,7 +391,7 @@ bb:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i32 0, i32 %tid
%tmp8 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp7, align 16, !tbaa !0
%tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> undef, <4 x i32> %tmp8, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 7.500000e-01, float 2.500000e-01, <8 x i32> undef, <4 x i32> %tmp8, i1 0, i32 0, i32 0)
%tmp10 = extractelement <4 x float> %tmp, i32 0
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
@ -419,7 +408,7 @@ declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
attributes #0 = { nounwind }

View File

@ -34,14 +34,9 @@ main_body:
%j.f.i4 = bitcast i32 %j.i2 to float
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) #1
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) #1
%tmp27 = bitcast float %p2.i to i32
%tmp28 = bitcast float %p2.i6 to i32
%tmp29 = insertelement <2 x i32> undef, i32 %tmp27, i32 0
%tmp30 = insertelement <2 x i32> %tmp29, i32 %tmp28, i32 1
%tmp22.bc = bitcast <32 x i8> %tmp22 to <8 x i32>
%tmp24.bc = bitcast <16 x i8> %tmp24 to <4 x i32>
%tmp30.bc = bitcast <2 x i32> %tmp30 to <2 x float>
%tmp31 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp30.bc, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp31 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i6, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i1 0, i32 0, i32 0)
%tmp32 = extractelement <4 x float> %tmp31, i32 0
%tmp33 = extractelement <4 x float> %tmp31, i32 1
@ -57,7 +52,7 @@ declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -324,14 +324,6 @@ main_body:
%tmp213 = fmul float %tmp205, %tmp191
%tmp214 = fmul float %tmp206, %tmp191
%tmp215 = fmul float -1.000000e+00, %tmp191
%tmp216 = bitcast float %tmp135 to i32
%tmp217 = bitcast float %tmp181 to i32
%tmp218 = bitcast float %tmp136 to i32
%tmp219 = bitcast float %tmp182 to i32
%tmp220 = insertelement <8 x i32> undef, i32 %tmp216, i32 0
%tmp221 = insertelement <8 x i32> %tmp220, i32 %tmp217, i32 1
%tmp222 = insertelement <8 x i32> %tmp221, i32 %tmp218, i32 2
%tmp223 = insertelement <8 x i32> %tmp222, i32 %tmp219, i32 3
br label %LOOP
LOOP: ; preds = %ENDIF, %main_body
@ -358,14 +350,7 @@ IF: ; preds = %LOOP
br label %LOOP65
ENDIF: ; preds = %LOOP
%tmp237 = bitcast float %temp28.0 to i32
%tmp238 = bitcast float %temp29.0 to i32
%tmp239 = insertelement <8 x i32> %tmp223, i32 %tmp237, i32 4
%tmp240 = insertelement <8 x i32> %tmp239, i32 %tmp238, i32 5
%tmp241 = insertelement <8 x i32> %tmp240, i32 undef, i32 6
%tmp242 = insertelement <8 x i32> %tmp241, i32 undef, i32 7
%tmp242.bc = bitcast <8 x i32> %tmp242 to <8 x float>
%tmp243 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp242.bc, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp243 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.0, float %temp29.0, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i1 0, i32 0, i32 0)
%tmp244 = extractelement <4 x float> %tmp243, i32 3
%tmp245 = fcmp oge float %temp30.0, %tmp244
%tmp246 = sext i1 %tmp245 to i32
@ -396,65 +381,20 @@ LOOP65: ; preds = %ENDIF66, %IF
br i1 %tmp262, label %IF67, label %ENDIF66
IF67: ; preds = %LOOP65
%tmp263 = bitcast float %tmp135 to i32
%tmp264 = bitcast float %tmp181 to i32
%tmp265 = bitcast float %tmp136 to i32
%tmp266 = bitcast float %tmp182 to i32
%tmp267 = bitcast float %temp28.1 to i32
%tmp268 = bitcast float %temp29.1 to i32
%tmp269 = insertelement <8 x i32> undef, i32 %tmp263, i32 0
%tmp270 = insertelement <8 x i32> %tmp269, i32 %tmp264, i32 1
%tmp271 = insertelement <8 x i32> %tmp270, i32 %tmp265, i32 2
%tmp272 = insertelement <8 x i32> %tmp271, i32 %tmp266, i32 3
%tmp273 = insertelement <8 x i32> %tmp272, i32 %tmp267, i32 4
%tmp274 = insertelement <8 x i32> %tmp273, i32 %tmp268, i32 5
%tmp275 = insertelement <8 x i32> %tmp274, i32 undef, i32 6
%tmp276 = insertelement <8 x i32> %tmp275, i32 undef, i32 7
%tmp67.bc = bitcast <4 x i32> %tmp67 to <4 x i32>
%tmp276.bc = bitcast <8 x i32> %tmp276 to <8 x float>
%tmp277 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp276.bc, <8 x i32> %tmp65, <4 x i32> %tmp67.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp277 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp65, <4 x i32> %tmp67.bc, i1 0, i32 0, i32 0)
%tmp278 = extractelement <4 x float> %tmp277, i32 0
%tmp279 = extractelement <4 x float> %tmp277, i32 1
%tmp280 = extractelement <4 x float> %tmp277, i32 2
%tmp281 = extractelement <4 x float> %tmp277, i32 3
%tmp282 = fmul float %tmp281, %tmp46
%tmp283 = bitcast float %tmp135 to i32
%tmp284 = bitcast float %tmp181 to i32
%tmp285 = bitcast float %tmp136 to i32
%tmp286 = bitcast float %tmp182 to i32
%tmp287 = bitcast float %temp28.1 to i32
%tmp288 = bitcast float %temp29.1 to i32
%tmp289 = insertelement <8 x i32> undef, i32 %tmp283, i32 0
%tmp290 = insertelement <8 x i32> %tmp289, i32 %tmp284, i32 1
%tmp291 = insertelement <8 x i32> %tmp290, i32 %tmp285, i32 2
%tmp292 = insertelement <8 x i32> %tmp291, i32 %tmp286, i32 3
%tmp293 = insertelement <8 x i32> %tmp292, i32 %tmp287, i32 4
%tmp294 = insertelement <8 x i32> %tmp293, i32 %tmp288, i32 5
%tmp295 = insertelement <8 x i32> %tmp294, i32 undef, i32 6
%tmp296 = insertelement <8 x i32> %tmp295, i32 undef, i32 7
%tmp83.bc = bitcast <4 x i32> %tmp83 to <4 x i32>
%tmp296.bc = bitcast <8 x i32> %tmp296 to <8 x float>
%tmp297 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp296.bc, <8 x i32> %tmp81, <4 x i32> %tmp83.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp297 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp81, <4 x i32> %tmp83.bc, i1 0, i32 0, i32 0)
%tmp298 = extractelement <4 x float> %tmp297, i32 0
%tmp299 = extractelement <4 x float> %tmp297, i32 1
%tmp300 = extractelement <4 x float> %tmp297, i32 2
%tmp301 = bitcast float %tmp135 to i32
%tmp302 = bitcast float %tmp181 to i32
%tmp303 = bitcast float %tmp136 to i32
%tmp304 = bitcast float %tmp182 to i32
%tmp305 = bitcast float %temp28.1 to i32
%tmp306 = bitcast float %temp29.1 to i32
%tmp307 = insertelement <8 x i32> undef, i32 %tmp301, i32 0
%tmp308 = insertelement <8 x i32> %tmp307, i32 %tmp302, i32 1
%tmp309 = insertelement <8 x i32> %tmp308, i32 %tmp303, i32 2
%tmp310 = insertelement <8 x i32> %tmp309, i32 %tmp304, i32 3
%tmp311 = insertelement <8 x i32> %tmp310, i32 %tmp305, i32 4
%tmp312 = insertelement <8 x i32> %tmp311, i32 %tmp306, i32 5
%tmp313 = insertelement <8 x i32> %tmp312, i32 undef, i32 6
%tmp314 = insertelement <8 x i32> %tmp313, i32 undef, i32 7
%tmp79.bc = bitcast <4 x i32> %tmp79 to <4 x i32>
%tmp314.bc = bitcast <8 x i32> %tmp314 to <8 x float>
%tmp315 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp314.bc, <8 x i32> %tmp77, <4 x i32> %tmp79.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp315 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp77, <4 x i32> %tmp79.bc, i1 0, i32 0, i32 0)
%tmp316 = extractelement <4 x float> %tmp315, i32 0
%tmp317 = extractelement <4 x float> %tmp315, i32 1
%tmp318 = extractelement <4 x float> %tmp315, i32 2
@ -470,22 +410,7 @@ IF67: ; preds = %LOOP65
%tmp328 = fadd float %tmp278, %tmp323
%tmp329 = fadd float %tmp279, %tmp325
%tmp330 = fadd float %tmp280, %tmp327
%tmp331 = bitcast float %tmp135 to i32
%tmp332 = bitcast float %tmp181 to i32
%tmp333 = bitcast float %tmp136 to i32
%tmp334 = bitcast float %tmp182 to i32
%tmp335 = bitcast float %temp28.1 to i32
%tmp336 = bitcast float %temp29.1 to i32
%tmp337 = insertelement <8 x i32> undef, i32 %tmp331, i32 0
%tmp338 = insertelement <8 x i32> %tmp337, i32 %tmp332, i32 1
%tmp339 = insertelement <8 x i32> %tmp338, i32 %tmp333, i32 2
%tmp340 = insertelement <8 x i32> %tmp339, i32 %tmp334, i32 3
%tmp341 = insertelement <8 x i32> %tmp340, i32 %tmp335, i32 4
%tmp342 = insertelement <8 x i32> %tmp341, i32 %tmp336, i32 5
%tmp343 = insertelement <8 x i32> %tmp342, i32 undef, i32 6
%tmp344 = insertelement <8 x i32> %tmp343, i32 undef, i32 7
%tmp344.bc = bitcast <8 x i32> %tmp344 to <8 x float>
%tmp345 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp344.bc, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp345 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i1 0, i32 0, i32 0)
%tmp346 = extractelement <4 x float> %tmp345, i32 0
%tmp347 = extractelement <4 x float> %tmp345, i32 1
%tmp348 = extractelement <4 x float> %tmp345, i32 2
@ -501,23 +426,8 @@ IF67: ; preds = %LOOP65
%tmp358 = fmul float %tmp349, %tmp357
%tmp359 = fmul float %tmp350, %tmp357
%tmp360 = fmul float %tmp351, %tmp357
%tmp361 = bitcast float %tmp135 to i32
%tmp362 = bitcast float %tmp181 to i32
%tmp363 = bitcast float %tmp136 to i32
%tmp364 = bitcast float %tmp182 to i32
%tmp365 = bitcast float %temp28.1 to i32
%tmp366 = bitcast float %temp29.1 to i32
%tmp367 = insertelement <8 x i32> undef, i32 %tmp361, i32 0
%tmp368 = insertelement <8 x i32> %tmp367, i32 %tmp362, i32 1
%tmp369 = insertelement <8 x i32> %tmp368, i32 %tmp363, i32 2
%tmp370 = insertelement <8 x i32> %tmp369, i32 %tmp364, i32 3
%tmp371 = insertelement <8 x i32> %tmp370, i32 %tmp365, i32 4
%tmp372 = insertelement <8 x i32> %tmp371, i32 %tmp366, i32 5
%tmp373 = insertelement <8 x i32> %tmp372, i32 undef, i32 6
%tmp374 = insertelement <8 x i32> %tmp373, i32 undef, i32 7
%tmp71.bc = bitcast <4 x i32> %tmp71 to <4 x i32>
%tmp374.bc = bitcast <8 x i32> %tmp374 to <8 x float>
%tmp375 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp374.bc, <8 x i32> %tmp69, <4 x i32> %tmp71.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp375 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp69, <4 x i32> %tmp71.bc, i1 0, i32 0, i32 0)
%tmp376 = extractelement <4 x float> %tmp375, i32 0
%tmp377 = extractelement <4 x float> %tmp375, i32 1
%tmp378 = extractelement <4 x float> %tmp375, i32 2
@ -557,23 +467,8 @@ IF67: ; preds = %LOOP65
%tmp412 = fadd float %tmp411, %tmp406
%tmp413 = fmul float %tmp399, %p2.i24
%tmp414 = fadd float %tmp413, %tmp408
%tmp415 = bitcast float %tmp135 to i32
%tmp416 = bitcast float %tmp181 to i32
%tmp417 = bitcast float %tmp136 to i32
%tmp418 = bitcast float %tmp182 to i32
%tmp419 = bitcast float %temp28.1 to i32
%tmp420 = bitcast float %temp29.1 to i32
%tmp421 = insertelement <8 x i32> undef, i32 %tmp415, i32 0
%tmp422 = insertelement <8 x i32> %tmp421, i32 %tmp416, i32 1
%tmp423 = insertelement <8 x i32> %tmp422, i32 %tmp417, i32 2
%tmp424 = insertelement <8 x i32> %tmp423, i32 %tmp418, i32 3
%tmp425 = insertelement <8 x i32> %tmp424, i32 %tmp419, i32 4
%tmp426 = insertelement <8 x i32> %tmp425, i32 %tmp420, i32 5
%tmp427 = insertelement <8 x i32> %tmp426, i32 undef, i32 6
%tmp428 = insertelement <8 x i32> %tmp427, i32 undef, i32 7
%tmp87.bc = bitcast <4 x i32> %tmp87 to <4 x i32>
%tmp428.bc = bitcast <8 x i32> %tmp428 to <8 x float>
%tmp429 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp428.bc, <8 x i32> %tmp85, <4 x i32> %tmp87.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp429 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp85, <4 x i32> %tmp87.bc, i1 0, i32 0, i32 0)
%tmp430 = extractelement <4 x float> %tmp429, i32 0
%tmp431 = extractelement <4 x float> %tmp429, i32 1
%tmp432 = extractelement <4 x float> %tmp429, i32 2
@ -617,16 +512,8 @@ IF67: ; preds = %LOOP65
%tmp460 = fadd float %tmp459, 1.500000e+00
%tmp461 = fmul float %tmp454, %tmp458
%tmp462 = fadd float %tmp461, 1.500000e+00
%tmp463 = bitcast float %tmp462 to i32
%tmp464 = bitcast float %tmp460 to i32
%tmp465 = bitcast float %tmp456 to i32
%tmp466 = insertelement <4 x i32> undef, i32 %tmp463, i32 0
%tmp467 = insertelement <4 x i32> %tmp466, i32 %tmp464, i32 1
%tmp468 = insertelement <4 x i32> %tmp467, i32 %tmp465, i32 2
%tmp469 = insertelement <4 x i32> %tmp468, i32 undef, i32 3
%tmp91.bc = bitcast <4 x i32> %tmp91 to <4 x i32>
%tmp469.bc = bitcast <4 x i32> %tmp469 to <4 x float>
%tmp470 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tmp469.bc, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tmp470 = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %tmp462, float %tmp460, float %tmp456, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i1 0, i32 0, i32 0) #0
%tmp471 = extractelement <4 x float> %tmp470, i32 0
%tmp472 = extractelement <4 x float> %tmp470, i32 1
%tmp473 = extractelement <4 x float> %tmp470, i32 2
@ -713,23 +600,8 @@ IF67: ; preds = %LOOP65
%tmp554 = fadd float %tmp553, %tmp549
%tmp555 = fmul float %tmp547, %tmp58
%tmp556 = fadd float %tmp555, %tmp550
%tmp557 = bitcast float %tmp135 to i32
%tmp558 = bitcast float %tmp181 to i32
%tmp559 = bitcast float %tmp136 to i32
%tmp560 = bitcast float %tmp182 to i32
%tmp561 = bitcast float %temp28.1 to i32
%tmp562 = bitcast float %temp29.1 to i32
%tmp563 = insertelement <8 x i32> undef, i32 %tmp557, i32 0
%tmp564 = insertelement <8 x i32> %tmp563, i32 %tmp558, i32 1
%tmp565 = insertelement <8 x i32> %tmp564, i32 %tmp559, i32 2
%tmp566 = insertelement <8 x i32> %tmp565, i32 %tmp560, i32 3
%tmp567 = insertelement <8 x i32> %tmp566, i32 %tmp561, i32 4
%tmp568 = insertelement <8 x i32> %tmp567, i32 %tmp562, i32 5
%tmp569 = insertelement <8 x i32> %tmp568, i32 undef, i32 6
%tmp570 = insertelement <8 x i32> %tmp569, i32 undef, i32 7
%tmp75.bc = bitcast <4 x i32> %tmp75 to <4 x i32>
%tmp570.bc = bitcast <8 x i32> %tmp570 to <8 x float>
%tmp571 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp570.bc, <8 x i32> %tmp73, <4 x i32> %tmp75.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp571 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp73, <4 x i32> %tmp75.bc, i1 0, i32 0, i32 0)
%tmp572 = extractelement <4 x float> %tmp571, i32 0
%tmp573 = extractelement <4 x float> %tmp571, i32 1
%tmp574 = extractelement <4 x float> %tmp571, i32 2
@ -745,14 +617,7 @@ IF67: ; preds = %LOOP65
ret void
ENDIF66: ; preds = %LOOP65
%tmp585 = bitcast float %temp28.1 to i32
%tmp586 = bitcast float %temp29.1 to i32
%tmp587 = insertelement <8 x i32> %tmp236, i32 %tmp585, i32 4
%tmp588 = insertelement <8 x i32> %tmp587, i32 %tmp586, i32 5
%tmp589 = insertelement <8 x i32> %tmp588, i32 undef, i32 6
%tmp590 = insertelement <8 x i32> %tmp589, i32 undef, i32 7
%tmp590.bc = bitcast <8 x i32> %tmp590 to <8 x float>
%tmp591 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp590.bc, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp591 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i1 0, i32 0, i32 0)
%tmp592 = extractelement <4 x float> %tmp591, i32 3
%tmp593 = fcmp oge float %temp30.1, %tmp592
%tmp594 = sext i1 %tmp593 to i32
@ -1140,13 +1005,8 @@ main_body:
%tmp218 = fmul float %., %tmp53
%tmp219 = fmul float %arg13, %tmp46
%tmp220 = fmul float %tmp196, %tmp47
%tmp221 = bitcast float %p2.i132 to i32
%tmp222 = bitcast float %p2.i126 to i32
%tmp223 = insertelement <2 x i32> undef, i32 %tmp221, i32 0
%tmp224 = insertelement <2 x i32> %tmp223, i32 %tmp222, i32 1
%tmp132.bc = bitcast <4 x i32> %tmp132 to <4 x i32>
%tmp224.bc = bitcast <2 x i32> %tmp224 to <2 x float>
%tmp225 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp224.bc, <8 x i32> %tmp130, <4 x i32> %tmp132.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp225 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i132, float %p2.i126, <8 x i32> %tmp130, <4 x i32> %tmp132.bc, i1 0, i32 0, i32 0)
%tmp226 = extractelement <4 x float> %tmp225, i32 0
%tmp227 = extractelement <4 x float> %tmp225, i32 1
%tmp228 = extractelement <4 x float> %tmp225, i32 2
@ -1220,7 +1080,7 @@ LOOP: ; preds = %LOOP, %main_body
%tmp281 = insertelement <4 x i32> %tmp280, i32 undef, i32 3
%tmp148.bc = bitcast <4 x i32> %tmp148 to <4 x i32>
%tmp281.bc = bitcast <4 x i32> %tmp281 to <4 x float>
%tmp282 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp281.bc, <8 x i32> %tmp146, <4 x i32> %tmp148.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp282 = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %temp168.0, float %temp169.0, float 0.0, <8 x i32> %tmp146, <4 x i32> %tmp148.bc, i1 0, i32 0, i32 0)
%tmp283 = extractelement <4 x float> %tmp282, i32 3
%tmp284 = fadd float %temp168.0, %tmp273
%tmp285 = fadd float %temp169.0, %tmp274
@ -1279,13 +1139,8 @@ IF189: ; preds = %LOOP
%tmp335 = fadd float %p2.i162, %tmp329
%tmp336 = fadd float %p2.i156, %tmp331
%tmp337 = fadd float %p2.i150, %tmp333
%tmp338 = bitcast float %tmp334 to i32
%tmp339 = bitcast float %tmp335 to i32
%tmp340 = insertelement <2 x i32> undef, i32 %tmp338, i32 0
%tmp341 = insertelement <2 x i32> %tmp340, i32 %tmp339, i32 1
%tmp136.bc = bitcast <4 x i32> %tmp136 to <4 x i32>
%a.bc.i = bitcast <2 x i32> %tmp341 to <2 x float>
%tmp0 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp134, <4 x i32> %tmp136.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp0 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp334, float %tmp335, <8 x i32> %tmp134, <4 x i32> %tmp136.bc, i1 0, i32 0, i32 0)
%tmp343 = extractelement <4 x float> %tmp0, i32 0
%tmp344 = extractelement <4 x float> %tmp0, i32 1
%tmp345 = extractelement <4 x float> %tmp0, i32 2
@ -1313,25 +1168,15 @@ IF189: ; preds = %LOOP
%one.sub.ac.i30 = fmul float %one.sub.a.i29, %tmp353
%mul.i31 = fmul float %tmp345, %tmp353
%result.i32 = fadd float %mul.i31, %one.sub.ac.i30
%tmp358 = bitcast float %tmp336 to i32
%tmp359 = bitcast float %tmp337 to i32
%tmp360 = insertelement <2 x i32> undef, i32 %tmp358, i32 0
%tmp361 = insertelement <2 x i32> %tmp360, i32 %tmp359, i32 1
%tmp152.bc = bitcast <4 x i32> %tmp152 to <4 x i32>
%a.bc.i3 = bitcast <2 x i32> %tmp361 to <2 x float>
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i3, <8 x i32> %tmp150, <4 x i32> %tmp152.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp336, float %tmp337, <8 x i32> %tmp150, <4 x i32> %tmp152.bc, i1 0, i32 0, i32 0)
%tmp363 = extractelement <4 x float> %tmp1, i32 2
%tmp364 = fmul float %result.i40, %result.i
%tmp365 = fmul float %result.i36, %result.i44
%tmp366 = fmul float %result.i32, %result.i42
%tmp367 = fmul float %tmp354, %tmp229
%tmp368 = bitcast float %tmp310 to i32
%tmp369 = bitcast float %tmp311 to i32
%tmp370 = insertelement <2 x i32> undef, i32 %tmp368, i32 0
%tmp371 = insertelement <2 x i32> %tmp370, i32 %tmp369, i32 1
%tmp140.bc = bitcast <4 x i32> %tmp140 to <4 x i32>
%a.bc.i2 = bitcast <2 x i32> %tmp371 to <2 x float>
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i2, <8 x i32> %tmp138, <4 x i32> %tmp140.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp310, float %tmp311, <8 x i32> %tmp138, <4 x i32> %tmp140.bc, i1 0, i32 0, i32 0)
%tmp373 = extractelement <4 x float> %tmp2, i32 0
%tmp374 = extractelement <4 x float> %tmp2, i32 1
%tmp375 = extractelement <4 x float> %tmp2, i32 2
@ -1343,13 +1188,8 @@ IF189: ; preds = %LOOP
%tmp381 = icmp ne i32 %tmp380, 0
%.224 = select i1 %tmp381, float %tmp374, float %tmp373
%.225 = select i1 %tmp381, float %tmp376, float %tmp374
%tmp382 = bitcast float %tmp320 to i32
%tmp383 = bitcast float %tmp321 to i32
%tmp384 = insertelement <2 x i32> undef, i32 %tmp382, i32 0
%tmp385 = insertelement <2 x i32> %tmp384, i32 %tmp383, i32 1
%tmp144.bc = bitcast <4 x i32> %tmp144 to <4 x i32>
%a.bc.i1 = bitcast <2 x i32> %tmp385 to <2 x float>
%tmp3 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i1, <8 x i32> %tmp142, <4 x i32> %tmp144.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp3 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp320, float %tmp321, <8 x i32> %tmp142, <4 x i32> %tmp144.bc, i1 0, i32 0, i32 0)
%tmp387 = extractelement <4 x float> %tmp3, i32 0
%tmp388 = extractelement <4 x float> %tmp3, i32 1
%tmp389 = extractelement <4 x float> %tmp3, i32 2
@ -1442,13 +1282,8 @@ ENDIF197: ; preds = %IF198, %IF189
%temp14.0 = phi float [ %tmp465, %IF198 ], [ %tmp457, %IF189 ]
%temp13.0 = phi float [ %tmp464, %IF198 ], [ %tmp456, %IF189 ]
%temp12.0 = phi float [ %tmp463, %IF198 ], [ %tmp455, %IF189 ]
%tmp466 = bitcast float %tmp219 to i32
%tmp467 = bitcast float %tmp220 to i32
%tmp468 = insertelement <2 x i32> undef, i32 %tmp466, i32 0
%tmp469 = insertelement <2 x i32> %tmp468, i32 %tmp467, i32 1
%tmp160.bc = bitcast <4 x i32> %tmp160 to <4 x i32>
%tmp469.bc = bitcast <2 x i32> %tmp469 to <2 x float>
%tmp470 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp469.bc, <8 x i32> %tmp158, <4 x i32> %tmp160.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp470 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp219, float %tmp220, <8 x i32> %tmp158, <4 x i32> %tmp160.bc, i1 0, i32 0, i32 0)
%tmp471 = extractelement <4 x float> %tmp470, i32 0
%tmp472 = extractelement <4 x float> %tmp470, i32 1
%tmp473 = extractelement <4 x float> %tmp470, i32 2
@ -1461,13 +1296,8 @@ ENDIF197: ; preds = %IF198, %IF189
%tmp480 = fadd float %tmp479, %tmp40
%tmp481 = fmul float %tmp474, %tmp41
%tmp482 = fadd float %tmp481, %tmp42
%tmp483 = bitcast float %p2.i144 to i32
%tmp484 = bitcast float %p2.i138 to i32
%tmp485 = insertelement <2 x i32> undef, i32 %tmp483, i32 0
%tmp486 = insertelement <2 x i32> %tmp485, i32 %tmp484, i32 1
%tmp156.bc = bitcast <4 x i32> %tmp156 to <4 x i32>
%tmp486.bc = bitcast <2 x i32> %tmp486 to <2 x float>
%tmp487 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp486.bc, <8 x i32> %tmp154, <4 x i32> %tmp156.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp487 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i144, float %p2.i138, <8 x i32> %tmp154, <4 x i32> %tmp156.bc, i1 0, i32 0, i32 0)
%tmp488 = extractelement <4 x float> %tmp487, i32 0
%tmp489 = extractelement <4 x float> %tmp487, i32 1
%tmp490 = extractelement <4 x float> %tmp487, i32 2
@ -1667,27 +1497,11 @@ ENDIF209: ; preds = %ELSE214, %ELSE211,
%tmp651 = fadd float %tmp650, 1.000000e+00
%max.0.i11 = call float @llvm.maxnum.f32(float %tmp651, float 0.000000e+00)
%clamp.i12 = call float @llvm.minnum.f32(float %max.0.i11, float 1.000000e+00)
%tmp653 = bitcast float %tmp642 to i32
%tmp654 = bitcast float %tmp644 to i32
%tmp655 = bitcast float 0.000000e+00 to i32
%tmp656 = insertelement <4 x i32> undef, i32 %tmp653, i32 0
%tmp657 = insertelement <4 x i32> %tmp656, i32 %tmp654, i32 1
%tmp658 = insertelement <4 x i32> %tmp657, i32 %tmp655, i32 2
%tmp659 = insertelement <4 x i32> %tmp658, i32 undef, i32 3
%tmp128.bc = bitcast <4 x i32> %tmp128 to <4 x i32>
%tmp659.bc = bitcast <4 x i32> %tmp659 to <4 x float>
%tmp660 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp659.bc, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp660 = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %tmp642, float %tmp644, float 0.0, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i1 0, i32 0, i32 0)
%tmp661 = extractelement <4 x float> %tmp660, i32 0
%tmp662 = extractelement <4 x float> %tmp660, i32 1
%tmp663 = bitcast float %tmp646 to i32
%tmp664 = bitcast float %tmp648 to i32
%tmp665 = bitcast float 0.000000e+00 to i32
%tmp666 = insertelement <4 x i32> undef, i32 %tmp663, i32 0
%tmp667 = insertelement <4 x i32> %tmp666, i32 %tmp664, i32 1
%tmp668 = insertelement <4 x i32> %tmp667, i32 %tmp665, i32 2
%tmp669 = insertelement <4 x i32> %tmp668, i32 undef, i32 3
%tmp669.bc = bitcast <4 x i32> %tmp669 to <4 x float>
%tmp670 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp669.bc, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp670 = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %tmp646, float %tmp648, float 0.0, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i1 0, i32 0, i32 0)
%tmp671 = extractelement <4 x float> %tmp670, i32 0
%tmp672 = extractelement <4 x float> %tmp670, i32 1
%tmp673 = fsub float -0.000000e+00, %tmp662
@ -1865,10 +1679,10 @@ declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
attributes #0 = { nounwind }

View File

@ -355,7 +355,7 @@ bb7: ; preds = %bb4
; CHECK: [[END]]:
; CHECK: s_endpgm
define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, <4 x float> %arg2) #0 {
define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 {
bb:
%tmp = fcmp ult float %arg1, 0.000000e+00
br i1 %tmp, label %bb3, label %bb4
@ -365,7 +365,7 @@ bb3: ; preds = %bb
br label %bb4
bb4: ; preds = %bb3, %bb
%tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %arg2, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp6 = extractelement <4 x float> %tmp5, i32 0
%tmp7 = fcmp une float %tmp6, 0.000000e+00
br i1 %tmp7, label %bb8, label %bb9
@ -378,7 +378,7 @@ bb9: ; preds = %bb4
ret void
}
declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare void @llvm.AMDGPU.kill(float) #0
attributes #0 = { nounwind }

View File

@ -21,7 +21,7 @@ bb3: ; preds = %bb
%tmp6 = sext i32 %tmp5 to i64
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i64 0, i64 %tmp6
%tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0
%tmp9 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float bitcast (i32 1061158912 to float), float bitcast (i32 1048576000 to float)>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp9 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float bitcast (i32 1061158912 to float), float bitcast (i32 1048576000 to float), <8 x i32> %tmp8, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp10 = extractelement <4 x float> %tmp9, i32 0
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
@ -30,7 +30,7 @@ bb3: ; preds = %bb
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
attributes #0 = { nounwind }

View File

@ -65,7 +65,7 @@ bb7: ; preds = %bb6
br label %bb4
bb9: ; preds = %bb2
%tmp10 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp10 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp11 = extractelement <4 x float> %tmp10, i32 1
%tmp12 = extractelement <4 x float> %tmp10, i32 3
br label %bb14
@ -97,7 +97,7 @@ bb27: ; preds = %bb24
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }

View File

@ -34,9 +34,8 @@ bb:
%tmp = load volatile i32, i32 addrspace(1)* undef, align 4
%tmp1 = load volatile i32, i32 addrspace(1)* undef, align 4
%tmp2 = insertelement <4 x i32> undef, i32 %tmp1, i32 0
%tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
%tmp3.cast = bitcast <4 x i32> %tmp3 to <4 x float>
%tmp4 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tmp3.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp3 = bitcast i32 %tmp1 to float
%tmp4 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp3, float %tmp3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp5 = extractelement <4 x float> %tmp4, i32 0
%tmp6 = fmul float %tmp5, undef
%tmp7 = fadd float %tmp6, %tmp6
@ -84,7 +83,7 @@ define amdgpu_kernel void @partially_undef_copy() #0 {
ret void
}
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }

View File

@ -17,25 +17,20 @@ main_body:
%j.f.i = bitcast i32 %j.i to float
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 3, i32 4, i32 %arg6) #2
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 3, i32 4, i32 %arg6) #2
%tmp23 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp23 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp24 = extractelement <4 x float> %tmp23, i32 3
%tmp25 = fmul float %tmp24, %tmp24
%tmp26 = fmul float %p2.i, %p2.i
%tmp27 = fadd float %tmp26, %tmp26
%tmp28 = bitcast float %tmp27 to i32
%tmp29 = insertelement <4 x i32> undef, i32 %tmp28, i32 0
%tmp30 = insertelement <4 x i32> %tmp29, i32 0, i32 1
%tmp31 = insertelement <4 x i32> %tmp30, i32 undef, i32 2
%tmp31.cast = bitcast <4 x i32> %tmp31 to <4 x float>
%tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp31.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float 0.0, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp33 = extractelement <4 x float> %tmp32, i32 0
%tmp34 = fadd float %tmp33, %tmp33
%tmp35 = fadd float %tmp34, %tmp34
%tmp36 = fadd float %tmp35, %tmp35
%tmp37 = fadd float %tmp36, %tmp36
%tmp38 = fadd float %tmp37, %tmp37
%tmp39 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp39 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp40 = extractelement <4 x float> %tmp39, i32 0
%tmp41 = extractelement <4 x float> %tmp39, i32 1
%tmp42 = extractelement <4 x float> %tmp39, i32 2
@ -53,17 +48,12 @@ main_body:
%tmp54 = insertelement <4 x i32> %tmp53, i32 %tmp51, i32 1
%tmp55 = insertelement <4 x i32> %tmp54, i32 %tmp52, i32 2
%tmp55.cast = bitcast <4 x i32> %tmp55 to <4 x float>
%tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp55.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float %tmp48, float %tmp49, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp57 = extractelement <4 x float> %tmp56, i32 0
%tmp58 = fadd float %tmp38, %tmp57
%tmp59 = fadd float %tmp46, %tmp46
%tmp60 = fadd float %tmp47, %tmp47
%tmp61 = bitcast float %tmp59 to i32
%tmp62 = bitcast float %tmp60 to i32
%tmp63 = insertelement <4 x i32> undef, i32 %tmp61, i32 1
%tmp64 = insertelement <4 x i32> %tmp63, i32 %tmp62, i32 2
%tmp64.cast = bitcast <4 x i32> %tmp64 to <4 x float>
%tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp64.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float undef, float %tmp59, float %tmp60, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp66 = extractelement <4 x float> %tmp65, i32 0
%tmp67 = fadd float %tmp58, %tmp66
%tmp68 = fmul float %tmp67, 1.250000e-01
@ -101,10 +91,7 @@ IF29: ; preds = %LOOP
br label %ENDIF25
ENDIF28: ; preds = %LOOP
%tmp85 = insertelement <4 x i32> %tmp72, i32 undef, i32 1
%tmp86 = insertelement <4 x i32> %tmp85, i32 undef, i32 2
%tmp86.cast = bitcast <4 x i32> %tmp86 to <4 x float>
%tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp86.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp88 = extractelement <4 x float> %tmp87, i32 0
%tmp89 = fadd float %tmp88, %tmp88
br label %LOOP
@ -114,9 +101,8 @@ declare float @llvm.minnum.f32(float, float) #1
declare float @llvm.maxnum.f32(float, float) #1
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
attributes #0 = { nounwind "InitialPSInputAddr"="36983" "target-cpu"="tonga" }
attributes #1 = { nounwind readnone }

View File

@ -5,10 +5,10 @@
;
;CHECK-LABEL: {{^}}test1:
;CHECK-NOT: s_wqm
define amdgpu_ps <4 x float> @test1(<8 x i32> inreg %rsrc, <4 x i32> %c) {
define amdgpu_ps <4 x float> @test1(<8 x i32> inreg %rsrc, i32 %c) {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %tex, <4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
%tex = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %c, <8 x i32> %rsrc, i32 0, i32 0)
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %tex, i32 15, i32 %c, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x float> %tex
}
@ -30,11 +30,9 @@ main_body:
%inst24 = extractelement <2 x float> %pos, i32 1
%inst25 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 0, i32 0, i32 %m0)
%inst26 = tail call float @llvm.amdgcn.interp.p2(float %inst25, float %inst24, i32 0, i32 0, i32 %m0)
%inst27 = insertelement <2 x float> undef, float %inst26, i32 0
%inst28 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 1, i32 0, i32 %m0)
%inst29 = tail call float @llvm.amdgcn.interp.p2(float %inst28, float %inst24, i32 1, i32 0, i32 %m0)
%inst30 = insertelement <2 x float> %inst27, float %inst29, i32 1
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %inst30, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tex = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %inst26, float %inst29, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
ret <4 x float> %tex
}
@ -49,9 +47,9 @@ main_body:
;CHECK: store
;CHECK-NOT: exec
;CHECK: .size test3
define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <4 x float> %c) {
define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float %c) {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%tex.1 = bitcast <4 x float> %tex to <4 x i32>
%tex.2 = extractelement <4 x i32> %tex.1, i32 0
@ -77,11 +75,9 @@ main_body:
%inst24 = extractelement <2 x float> %pos, i32 1
%inst25 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 0, i32 0, i32 %m0)
%inst26 = tail call float @llvm.amdgcn.interp.p2(float %inst25, float %inst24, i32 0, i32 0, i32 %m0)
%inst27 = insertelement <2 x float> undef, float %inst26, i32 0
%inst28 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 1, i32 0, i32 %m0)
%inst29 = tail call float @llvm.amdgcn.interp.p2(float %inst28, float %inst24, i32 1, i32 0, i32 %m0)
%inst30 = insertelement <2 x float> %inst27, float %inst29, i32 1
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %inst30, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tex = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %inst26, float %inst29, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%tex.0 = extractelement <4 x float> %tex, i32 0
%tex.1 = extractelement <4 x float> %tex, i32 1
%tex.2 = extractelement <4 x float> %tex, i32 2
@ -108,8 +104,9 @@ main_body:
call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> undef, <4 x i32> undef, i32 %c.1, i32 0, i1 0, i1 0)
%c.1.bc = bitcast i32 %c.1 to float
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%tex0 = extractelement <4 x float> %tex, i32 0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
ret <4 x float> %dtex
}
@ -361,8 +358,9 @@ main_body:
IF:
%c.bc = bitcast i32 %c to float
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%tex0 = extractelement <4 x float> %tex, i32 0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%data.if = extractelement <4 x float> %dtex, i32 0
br label %END
@ -403,8 +401,9 @@ main_body:
IF:
%c.bc = bitcast i32 %c to float
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%tex0 = extractelement <4 x float> %tex, i32 0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%data.if = extractelement <4 x float> %dtex, i32 0
br label %END
@ -460,7 +459,7 @@ ELSE:
END:
%coord.END = phi i32 [ %coord.IF, %IF ], [ %coord.ELSE, %ELSE ]
%coord.END.bc = bitcast i32 %coord.END to float
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord.END.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord.END.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
ret <4 x float> %tex
}
@ -477,8 +476,9 @@ END:
;CHECK-DAG: store
define amdgpu_ps float @test_control_flow_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %coord) {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%tex0 = extractelement <4 x float> %tex, i32 0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%dtex.1 = extractelement <4 x float> %dtex, i32 0
call void @llvm.amdgcn.buffer.store.f32(float %dtex.1, <4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
@ -523,8 +523,9 @@ IF:
br label %END
END:
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%tex0 = extractelement <4 x float> %tex, i32 0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
ret <4 x float> %dtex
}
@ -545,7 +546,7 @@ END:
;CHECK: image_sample
define amdgpu_ps <4 x float> @test_kill_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <2 x i32> %idx, <2 x float> %data, float %coord, float %coord2, float %z) {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%idx.0 = extractelement <2 x i32> %idx, i32 0
%data.0 = extractelement <2 x float> %data, i32 0
call void @llvm.amdgcn.buffer.store.f32(float %data.0, <4 x i32> undef, i32 %idx.0, i32 0, i1 0, i1 0)
@ -555,8 +556,9 @@ main_body:
%idx.1 = extractelement <2 x i32> %idx, i32 1
%data.1 = extractelement <2 x float> %data, i32 1
call void @llvm.amdgcn.buffer.store.f32(float %data.1, <4 x i32> undef, i32 %idx.1, i32 0, i1 0, i1 0)
%tex2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex2, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tex2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%tex2.0 = extractelement <4 x float> %tex2, i32 0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex2.0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%out = fadd <4 x float> %tex, %dtex
ret <4 x float> %out
@ -576,8 +578,9 @@ main_body:
; CHECK: v_cmpx_
define amdgpu_ps <4 x float> @test_kill_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %data, float %coord, float %coord2, float %z) {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%tex0 = extractelement <4 x float> %tex, i32 0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
@ -618,7 +621,7 @@ main_body:
; CHECK: ; return
define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
entry:
call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %in, <4 x i32> undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0)
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %in, i32 15, i32 undef, <8 x i32> undef, i32 0, i32 0)
br label %loop
loop:
@ -628,7 +631,8 @@ loop:
br i1 %cc, label %break, label %body
body:
%c.next = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %c.iv, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%c.iv0 = extractelement <4 x float> %c.iv, i32 0
%c.next = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.iv0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
%ctr.next = fadd float %ctr.iv, 2.0
br label %loop
@ -669,7 +673,7 @@ entry:
%c.gep = getelementptr [32 x i32], [32 x i32] addrspace(5)* %array, i32 0, i32 %idx
%c = load i32, i32 addrspace(5)* %c.gep, align 4
%c.bc = bitcast i32 %c to float
%t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
ret void
@ -687,8 +691,9 @@ entry:
; CHECK: s_and_b64 exec, exec, [[LIVE]]
; CHECK-NOT: exec
define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
%tex0 = extractelement <4 x float> %tex, i32 0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
ret <4 x float> %dtex
}
@ -700,8 +705,9 @@ define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
; CHECK-NOT: exec
define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) nounwind {
entry:
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
%tex0 = extractelement <4 x float> %tex, i32 0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
%cc = icmp sgt i32 %c, 0
br i1 %cc, label %if, label %else
@ -733,11 +739,11 @@ main_body:
br i1 %cc, label %if, label %else
if:
%r.if = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float 0.0, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%r.if = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
br label %end
else:
%r.else = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 0.0, float bitcast (i32 1 to float)>, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%r.else = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.0, float bitcast (i32 1 to float), <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
br label %end
end:
@ -757,8 +763,9 @@ end:
define amdgpu_ps float @test_wwm_within_wqm(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %c, i32 %z, float %data) {
main_body:
%c.bc = bitcast i32 %c to float
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%tex0 = extractelement <4 x float> %tex, i32 0
%dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%cmp = icmp eq i32 %z, 0
br i1 %cmp, label %IF, label %ENDIF
@ -777,14 +784,13 @@ ENDIF:
}
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #1
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #2
declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #3
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #3
declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #3
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
declare void @llvm.AMDGPU.kill(float) #1
declare float @llvm.amdgcn.wqm.f32(float) #3
declare i32 @llvm.amdgcn.wqm.i32(i32) #3