mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-20 23:30:54 +00:00
[AMDGPU] Add 2 gfx940 mfma tests. NFC.
This commit is contained in:
parent
4308fdf83b
commit
275b0c5a5a
@ -1,5 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX908,GFX908_A %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,GFX908_A %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX940 %s
|
||||
|
||||
; GCN-LABEL: {{^}}test_mfma_loop_zeroinit:
|
||||
|
||||
@ -12,6 +13,7 @@
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GFX940: v_mfma_f32_32x32x1_2b_f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GCN: s_cbranch_scc1 [[LOOP]]
|
||||
|
||||
@ -46,6 +48,7 @@ exit:
|
||||
; Check that we do not use 32 temp sgprs as well.
|
||||
|
||||
; GFX908_A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x42f60000
|
||||
; GFX940: s_mov_b32 [[TMP:s[0-9]+]], 0x42f60000
|
||||
; GFX908-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], [[TMP]]
|
||||
; GFX90A-COUNT-31: v_accvgpr_mov_b32 a{{[0-9]+}}, [[LEAD]]
|
||||
@ -53,6 +56,7 @@ exit:
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GFX940: v_mfma_f32_32x32x1_2b_f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GCN: s_cbranch_scc1 [[LOOP]]
|
||||
|
||||
@ -88,6 +92,7 @@ exit:
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GFX940: v_mfma_f32_32x32x1_2b_f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GCN: s_cbranch_scc1 [[LOOP]]
|
||||
|
||||
@ -248,10 +253,13 @@ exit:
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
|
||||
; GFX940-COUNT-32: s_mov_b32 s{{[0-9]+}}, 0x4{{[0-9a-f]+}}
|
||||
; GFX940-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GFX940: v_mfma_f32_32x32x1_2b_f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GCN: s_cbranch_scc1 [[LOOP]]
|
||||
|
||||
@ -284,6 +292,7 @@ exit:
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GFX940: v_mfma_f32_32x32x1_2b_f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GCN: s_cbranch_scc1 [[LOOP]]
|
||||
|
||||
@ -349,11 +358,13 @@ exit:
|
||||
; GFX908_A: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX908-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], [[TMP]]
|
||||
; GFX940: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], s{{[0-9]+}}
|
||||
; GFX90A-COUNT-31: v_accvgpr_mov_b32 a{{[0-9]+}}, [[LEAD]]
|
||||
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GFX940: v_mfma_f32_32x32x1_2b_f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GCN: s_cbranch_scc1 [[LOOP]]
|
||||
|
||||
@ -416,6 +427,7 @@ exit:
|
||||
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v0
|
||||
; GFX908_A-DAG: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX940-DAG: s_load_dword [[TMP:s[0-9]+]],
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}}
|
||||
@ -454,6 +466,7 @@ exit:
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GFX940: v_mfma_f32_32x32x1_2b_f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GCN: s_cbranch_scc1 [[LOOP]]
|
||||
|
||||
@ -491,11 +504,13 @@ exit:
|
||||
; GFX90A-NOT: v_accvgpr
|
||||
; GFX90A: v_mfma_f32_32x32x1f32 a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}}
|
||||
; GFX90A-NOT: v_accvgpr
|
||||
; GFX940: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}}
|
||||
; GCN-NOT: v_accvgpr
|
||||
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GFX940: v_mfma_f32_32x32x1_2b_f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GCN: s_cbranch_scc1 [[LOOP]]
|
||||
|
||||
@ -530,6 +545,7 @@ exit:
|
||||
; GFX90A-NOT: v_accvgpr
|
||||
; GFX90A: v_mfma_f32_32x32x1f32 a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}}
|
||||
; GFX90A-NOT: v_accvgpr
|
||||
; GFX940: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}}
|
||||
|
||||
; Check that we are using only one tmp VGPR.
|
||||
|
||||
@ -541,6 +557,7 @@ exit:
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GFX940: v_mfma_f32_32x32x1_2b_f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GCN: s_cbranch_scc1 [[LOOP]]
|
||||
|
||||
@ -614,6 +631,7 @@ exit:
|
||||
; GCN: [[INNER_LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32
|
||||
; GFX940: v_mfma_f32_32x32x1_2b_f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GCN: s_cbranch_scc1 [[INNER_LOOP]]
|
||||
; GCN-NOT: v_accvgpr
|
||||
|
@ -1,5 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GREEDY %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GREEDY %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GREEDY %s
|
||||
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GREEDY %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx90a -sgpr-regalloc=fast -vgpr-regalloc=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,FAST %s
|
||||
|
||||
@ -11,11 +12,11 @@ declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>
|
||||
declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32)
|
||||
|
||||
; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32:
|
||||
; GREEDY: v_mfma_f32_32x32x1f32 a[0:31], v{{[0-9]+}}, v{{[0-9]+}}, a[0:31]
|
||||
; GREEDY: v_mfma_f32_32x32x1f32 a[32:63], v{{[0-9]+}}, v{{[0-9]+}}, a[0:31]
|
||||
; FAST: v_mfma_f32_32x32x1f32 a[64:95], v{{[0-9]+}}, v{{[0-9]+}}, a[64:95]
|
||||
; FAST: v_mfma_f32_32x32x1f32 a[32:63], v{{[0-9]+}}, v{{[0-9]+}}, a[64:95]
|
||||
; GCN: v_mfma_f32_32x32x1f32 a[0:31], v{{[0-9]+}}, v{{[0-9]+}}, a[0:31]
|
||||
; GREEDY: v_mfma_f32_32x32x1{{.*}} a[0:31], v{{[0-9]+}}, v{{[0-9]+}}, a[0:31]
|
||||
; GREEDY: v_mfma_f32_32x32x1{{.*}} a[32:63], v{{[0-9]+}}, v{{[0-9]+}}, a[0:31]
|
||||
; FAST: v_mfma_f32_32x32x1{{.*}} a[64:95], v{{[0-9]+}}, v{{[0-9]+}}, a[64:95]
|
||||
; FAST: v_mfma_f32_32x32x1{{.*}} a[32:63], v{{[0-9]+}}, v{{[0-9]+}}, a[64:95]
|
||||
; GCN: v_mfma_f32_32x32x1{{.*}} a[0:31], v{{[0-9]+}}, v{{[0-9]+}}, a[0:31]
|
||||
define amdgpu_kernel void @test_mfma_f32_32x32x1f32(<32 x float> addrspace(1)* %arg) #0 {
|
||||
bb:
|
||||
%in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg
|
||||
@ -28,11 +29,11 @@ bb:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_mfma_f32_16x16x1f32:
|
||||
; GREEDY: v_mfma_f32_16x16x1f32 a[0:15], v{{[0-9]+}}, v{{[0-9]+}}, a[0:15]
|
||||
; GREEDY: v_mfma_f32_16x16x1f32 a[16:31], v{{[0-9]+}}, v{{[0-9]+}}, a[0:15]
|
||||
; FAST: v_mfma_f32_16x16x1f32 a[32:47], v{{[0-9]+}}, v{{[0-9]+}}, a[32:47]
|
||||
; FAST: v_mfma_f32_16x16x1f32 a[16:31], v{{[0-9]+}}, v{{[0-9]+}}, a[32:47]
|
||||
; GCN: v_mfma_f32_16x16x1f32 a[0:15], v{{[0-9]+}}, v{{[0-9]+}}, a[0:15]
|
||||
; GREEDY: v_mfma_f32_16x16x1{{.*}} a[0:15], v{{[0-9]+}}, v{{[0-9]+}}, a[0:15]
|
||||
; GREEDY: v_mfma_f32_16x16x1{{.*}} a[16:31], v{{[0-9]+}}, v{{[0-9]+}}, a[0:15]
|
||||
; FAST: v_mfma_f32_16x16x1{{.*}} a[32:47], v{{[0-9]+}}, v{{[0-9]+}}, a[32:47]
|
||||
; FAST: v_mfma_f32_16x16x1{{.*}} a[16:31], v{{[0-9]+}}, v{{[0-9]+}}, a[32:47]
|
||||
; GCN: v_mfma_f32_16x16x1{{.*}} a[0:15], v{{[0-9]+}}, v{{[0-9]+}}, a[0:15]
|
||||
define amdgpu_kernel void @test_mfma_f32_16x16x1f32(<16 x float> addrspace(1)* %arg) #0 {
|
||||
bb:
|
||||
%in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg
|
||||
@ -47,11 +48,11 @@ bb:
|
||||
; This instruction allows the overlap since it only read 4 registers.
|
||||
|
||||
; GCN-LABEL: {{^}}test_mfma_f32_4x4x1f32:
|
||||
; GREEDY: v_mfma_f32_4x4x1f32 a[0:3], v{{[0-9]+}}, v{{[0-9]+}}, a[0:3]
|
||||
; GREEDY: v_mfma_f32_4x4x1f32 a[2:5], v{{[0-9]+}}, v{{[0-9]+}}, a[0:3]
|
||||
; FAST: v_mfma_f32_4x4x1f32 a[8:11], v{{[0-9]+}}, v{{[0-9]+}}, a[0:3]
|
||||
; FAST: v_mfma_f32_4x4x1f32 a[4:7], v{{[0-9]+}}, v{{[0-9]+}}, a[8:11]
|
||||
; GCN: v_mfma_f32_4x4x1f32 a[0:3], v{{[0-9]+}}, v{{[0-9]+}}, a[0:3]
|
||||
; GREEDY: v_mfma_f32_4x4x1{{.*}} a[0:3], v{{[0-9]+}}, v{{[0-9]+}}, a[0:3]
|
||||
; GREEDY: v_mfma_f32_4x4x1{{.*}} a[2:5], v{{[0-9]+}}, v{{[0-9]+}}, a[0:3]
|
||||
; FAST: v_mfma_f32_4x4x1{{.*}} a[8:11], v{{[0-9]+}}, v{{[0-9]+}}, a[0:3]
|
||||
; FAST: v_mfma_f32_4x4x1{{.*}} a[4:7], v{{[0-9]+}}, v{{[0-9]+}}, a[8:11]
|
||||
; GCN: v_mfma_f32_4x4x1{{.*}} a[0:3], v{{[0-9]+}}, v{{[0-9]+}}, a[0:3]
|
||||
define amdgpu_kernel void @test_mfma_f32_4x4x1f32(<4 x float> addrspace(1)* %arg) #0 {
|
||||
bb:
|
||||
%in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg
|
||||
|
Loading…
x
Reference in New Issue
Block a user