mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-10 03:13:34 +00:00
AMDGPU: Match store d16_hi instructions
llvm-svn: 313712
This commit is contained in:
parent
09613b122e
commit
fcc213fab7
@ -222,23 +222,43 @@ def COND_NULL : PatLeaf <
|
||||
// Load/Store Pattern Fragments
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class PrivateMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS;
|
||||
}]>;
|
||||
class LoadFrag <SDPatternOperator op> : PatFrag<(ops node:$ptr), (op node:$ptr)>;
|
||||
|
||||
class PrivateLoad <SDPatternOperator op> : PrivateMemOp <
|
||||
(ops node:$ptr), (op node:$ptr)
|
||||
>;
|
||||
|
||||
class PrivateStore <SDPatternOperator op> : PrivateMemOp <
|
||||
class StoreFrag<SDPatternOperator op> : PatFrag <
|
||||
(ops node:$value, node:$ptr), (op node:$value, node:$ptr)
|
||||
>;
|
||||
|
||||
def load_private : PrivateLoad <load>;
|
||||
class StoreHi16<SDPatternOperator op> : PatFrag <
|
||||
(ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)
|
||||
>;
|
||||
|
||||
class PrivateAddress : CodePatPred<[{
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS;
|
||||
}]>;
|
||||
|
||||
class LocalAddress : CodePatPred<[{
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
|
||||
}]>;
|
||||
|
||||
class GlobalAddress : CodePatPred<[{
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
|
||||
}]>;
|
||||
|
||||
class FlatLoadAddress : CodePatPred<[{
|
||||
const auto AS = cast<MemSDNode>(N)->getAddressSpace();
|
||||
return AS == AMDGPUASI.FLAT_ADDRESS ||
|
||||
AS == AMDGPUASI.GLOBAL_ADDRESS;
|
||||
}]>;
|
||||
|
||||
|
||||
def load_private : LoadFrag <load>, PrivateAddress;
|
||||
def truncstorei8_private : StoreFrag<truncstorei8>, PrivateAddress;
|
||||
def truncstorei16_private : StoreFrag <truncstorei16>, PrivateAddress;
|
||||
def store_private : StoreFrag <store>, PrivateAddress;
|
||||
|
||||
def store_private_hi16 : StoreHi16 <truncstorei16>, PrivateAddress;
|
||||
def truncstorei8_private_hi16 : StoreHi16<truncstorei8>, PrivateAddress;
|
||||
|
||||
def truncstorei8_private : PrivateStore <truncstorei8>;
|
||||
def truncstorei16_private : PrivateStore <truncstorei16>;
|
||||
def store_private : PrivateStore <store>;
|
||||
|
||||
class GlobalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
|
||||
@ -315,8 +335,12 @@ def sextloadi8_constant : ConstantLoad <sextloadi8>;
|
||||
def az_extloadi8_local : LocalLoad <az_extloadi8>;
|
||||
def sextloadi8_local : LocalLoad <sextloadi8>;
|
||||
|
||||
def extloadi8_private : PrivateLoad <az_extloadi8>;
|
||||
def sextloadi8_private : PrivateLoad <sextloadi8>;
|
||||
def store_local_hi16 : StoreHi16 <truncstorei16>, LocalAddress;
|
||||
def truncstorei8_local_hi16 : StoreHi16<truncstorei8>, LocalAddress;
|
||||
|
||||
|
||||
def extloadi8_private : LoadFrag <az_extloadi8>, PrivateAddress;
|
||||
def sextloadi8_private : LoadFrag <sextloadi8>, PrivateAddress;
|
||||
|
||||
def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
|
||||
@ -331,8 +355,8 @@ def sextloadi16_constant : ConstantLoad <sextloadi16>;
|
||||
def az_extloadi16_local : LocalLoad <az_extloadi16>;
|
||||
def sextloadi16_local : LocalLoad <sextloadi16>;
|
||||
|
||||
def extloadi16_private : PrivateLoad <az_extloadi16>;
|
||||
def sextloadi16_private : PrivateLoad <sextloadi16>;
|
||||
def extloadi16_private : LoadFrag <az_extloadi16>, PrivateAddress;
|
||||
def sextloadi16_private : LoadFrag <sextloadi16>, PrivateAddress;
|
||||
|
||||
def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
|
||||
@ -347,9 +371,12 @@ def az_extloadi32_constant : ConstantLoad <az_extloadi32>;
|
||||
def truncstorei8_global : GlobalStore <truncstorei8>;
|
||||
def truncstorei16_global : GlobalStore <truncstorei16>;
|
||||
|
||||
def local_store : LocalStore <store>;
|
||||
def truncstorei8_local : LocalStore <truncstorei8>;
|
||||
def truncstorei16_local : LocalStore <truncstorei16>;
|
||||
def truncstorei8_global_hi16 : StoreHi16 <truncstorei8>, GlobalAddress;
|
||||
def truncstorei16_global_hi16 : StoreHi16 <truncstorei16>, GlobalAddress;
|
||||
|
||||
def local_store : StoreFrag <store>, LocalAddress;
|
||||
def truncstorei8_local : StoreFrag <truncstorei8>, LocalAddress;
|
||||
def truncstorei16_local : StoreFrag <truncstorei16>, LocalAddress;
|
||||
|
||||
def local_load : LocalLoad <load>;
|
||||
|
||||
|
@ -1264,6 +1264,16 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private>;
|
||||
|
||||
|
||||
let Predicates = [HasD16LoadStore] in {
|
||||
// Hiding the extract high pattern in the PatFrag seems to not
|
||||
// automatically increase the complexity.
|
||||
let AddedComplexity = 1 in {
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_D16_HI_OFFEN, BUFFER_STORE_SHORT_D16_HI_OFFSET, i32, store_private_hi16>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_D16_HI_OFFEN, BUFFER_STORE_BYTE_D16_HI_OFFSET, i32, truncstorei8_private_hi16>;
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MTBUF Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -576,6 +576,11 @@ def : DSWritePat <DS_WRITE_B8, i16, si_truncstore_local_i8>;
|
||||
def : DSWritePat <DS_WRITE_B16, i16, si_store_local>;
|
||||
def : DSWritePat <DS_WRITE_B32, i32, si_store_local>;
|
||||
|
||||
let Predicates = [HasD16LoadStore] in {
|
||||
def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_local_hi16>;
|
||||
def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_local_hi16>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 100 in {
|
||||
|
||||
def : DSWritePat <DS_WRITE_B64, v2i32, si_store_local_align8>;
|
||||
|
@ -645,6 +645,10 @@ def flat_store : flat_st <store>;
|
||||
def flat_truncstorei8 : flat_st <truncstorei8>;
|
||||
def flat_truncstorei16 : flat_st <truncstorei16>;
|
||||
|
||||
def flat_truncstorei8_hi16 : StoreHi16<truncstorei8>, FlatLoadAddress;
|
||||
def flat_truncstorei16_hi16 : StoreHi16<truncstorei16>, FlatLoadAddress;
|
||||
|
||||
|
||||
// Patterns for global loads with no offset.
|
||||
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
|
||||
@ -752,6 +756,12 @@ def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
|
||||
|
||||
def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i16>;
|
||||
def : FlatStorePat <FLAT_STORE_SHORT, flat_store, i16>;
|
||||
|
||||
let Predicates = [HasD16LoadStore] in {
|
||||
def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, flat_truncstorei16_hi16, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, flat_truncstorei8_hi16, i32>;
|
||||
}
|
||||
|
||||
} // End Predicates = [HasFlatAddressSpace]
|
||||
|
||||
let Predicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
|
||||
@ -779,6 +789,13 @@ def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, global_store, i32>;
|
||||
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, global_store, v2i32>;
|
||||
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, global_store, v4i32>;
|
||||
|
||||
|
||||
let Predicates = [HasD16LoadStore] in {
|
||||
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_global_hi16, i32>;
|
||||
def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_global_hi16, i32>;
|
||||
}
|
||||
|
||||
|
||||
def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, global_store_atomic, i32>;
|
||||
def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, global_store_atomic, i64>;
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=kaveri -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=CIVI -check-prefix=GCN %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=GFX89 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN -check-prefix=GFX89 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=kaveri -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI,GFX89 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
|
||||
|
||||
; FIXME: Should be able to do scalar op
|
||||
; GCN-LABEL: {{^}}s_fneg_f16:
|
||||
@ -154,7 +154,8 @@ define amdgpu_kernel void @v_extract_fneg_fold_v2f16(<2 x half> addrspace(1)* %i
|
||||
; GCN-LABEL: {{^}}v_extract_fneg_no_fold_v2f16:
|
||||
; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
|
||||
; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80008000, [[VAL]]
|
||||
; GCN: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[NEG]]
|
||||
; CIVI: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[NEG]]
|
||||
; GFX9: global_store_short_d16_hi v{{\[[0-9]+:[0-9]+\]}}, [[NEG]], off
|
||||
define amdgpu_kernel void @v_extract_fneg_no_fold_v2f16(<2 x half> addrspace(1)* %in) #0 {
|
||||
%val = load <2 x half>, <2 x half> addrspace(1)* %in
|
||||
%fneg = fsub <2 x half> <half -0.0, half -0.0>, %val
|
||||
|
@ -85,11 +85,11 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}store_i24:
|
||||
; GCN: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
|
||||
; SIVI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
|
||||
; SIVI-DAG: buffer_store_byte
|
||||
; SIVI-DAG: buffer_store_short
|
||||
|
||||
; GFX9-DAG: global_store_byte
|
||||
; GFX9-DAG: global_store_byte_d16_hi v{{\[[0-9]:[0-9]+\]}}, v{{[0-9]+}}, off offset:2
|
||||
; GFX9-DAG: global_store_short
|
||||
|
||||
; EG: MEM_RAT MSKOR
|
||||
|
594
llvm/test/CodeGen/AMDGPU/store-hi16.ll
Normal file
594
llvm/test/CodeGen/AMDGPU/store-hi16.ll
Normal file
@ -0,0 +1,594 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}store_global_hi_v2i16:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
|
||||
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI-NEXT: flat_store_short v[0:1], v2
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_global_hi_v2i16(i16 addrspace(1)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
; FIXME: ABI for pre-gfx9
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
store i16 %hi, i16 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_global_hi_v2f16:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
|
||||
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI-NEXT: flat_store_short v[0:1], v2
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_global_hi_v2f16(half addrspace(1)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
; FIXME: ABI for pre-gfx9
|
||||
%value = bitcast i32 %arg to <2 x half>
|
||||
%hi = extractelement <2 x half> %value, i32 1
|
||||
store half %hi, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_global_hi_i32_shift:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
|
||||
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI-NEXT: flat_store_short v[0:1], v2
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_global_hi_i32_shift(i16 addrspace(1)* %out, i32 %value) #0 {
|
||||
entry:
|
||||
%hi32 = lshr i32 %value, 16
|
||||
%hi = trunc i32 %hi32 to i16
|
||||
store i16 %hi, i16 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_global_hi_v2i16_i8:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off
|
||||
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI-NEXT: flat_store_byte v[0:1], v2
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_global_hi_v2i16_i8(i8 addrspace(1)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%trunc = trunc i16 %hi to i8
|
||||
store i8 %trunc, i8 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_global_hi_i8_shift:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off
|
||||
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI-NEXT: flat_store_byte v[0:1], v2
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_global_hi_i8_shift(i8 addrspace(1)* %out, i32 %value) #0 {
|
||||
entry:
|
||||
%hi32 = lshr i32 %value, 16
|
||||
%hi = trunc i32 %hi32 to i8
|
||||
store i8 %hi, i8 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_global_hi_v2i16_max_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:4094
|
||||
|
||||
; VI-DAG: v_add_i32_e32
|
||||
; VI-DAG: v_addc_u32_e32
|
||||
; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
|
||||
|
||||
; VI: flat_store_short v[0:1], v2{{$}}
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_global_hi_v2i16_max_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
; FIXME: ABI for pre-gfx9
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 2047
|
||||
store i16 %hi, i16 addrspace(1)* %gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_global_hi_v2i16_min_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:-4096{{$}}
|
||||
|
||||
; VI-DAG: v_add_i32_e32
|
||||
; VI-DAG: v_addc_u32_e32
|
||||
; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
|
||||
|
||||
; VI: flat_store_short v[0:1], v{{[0-9]$}}
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_global_hi_v2i16_min_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 -2048
|
||||
store i16 %hi, i16 addrspace(1)* %gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_max_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:4095
|
||||
|
||||
; VI-DAG: v_add_i32_e32
|
||||
; VI-DAG: v_addc_u32_e32
|
||||
; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI: flat_store_byte v[0:1], v{{[0-9]$}}
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_global_hi_v2i16_i8_max_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%trunc = trunc i16 %hi to i8
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 4095
|
||||
store i8 %trunc, i8 addrspace(1)* %gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_min_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:-4095
|
||||
|
||||
; VI-DAG: v_add_i32_e32
|
||||
; VI-DAG: v_addc_u32_e32
|
||||
; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
|
||||
|
||||
; VI: flat_store_byte v[0:1], v{{[0-9]$}}
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_global_hi_v2i16_i8_min_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%trunc = trunc i16 %hi to i8
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 -4095
|
||||
store i8 %trunc, i8 addrspace(1)* %gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_flat_hi_v2i16:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
|
||||
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI-NEXT: flat_store_short v[0:1], v2
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_flat_hi_v2i16(i16 addrspace(4)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
store i16 %hi, i16 addrspace(4)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_flat_hi_v2f16:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
|
||||
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI-NEXT: flat_store_short v[0:1], v2
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_flat_hi_v2f16(half addrspace(4)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
%value = bitcast i32 %arg to <2 x half>
|
||||
%hi = extractelement <2 x half> %value, i32 1
|
||||
store half %hi, half addrspace(4)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_flat_hi_i32_shift:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
|
||||
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI-NEXT: flat_store_short v[0:1], v2
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_flat_hi_i32_shift(i16 addrspace(4)* %out, i32 %value) #0 {
|
||||
entry:
|
||||
%hi32 = lshr i32 %value, 16
|
||||
%hi = trunc i32 %hi32 to i16
|
||||
store i16 %hi, i16 addrspace(4)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
|
||||
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI-NEXT: flat_store_byte v[0:1], v2
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_flat_hi_v2i16_i8(i8 addrspace(4)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%trunc = trunc i16 %hi to i8
|
||||
store i8 %trunc, i8 addrspace(4)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_flat_hi_i8_shift:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
|
||||
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI-NEXT: flat_store_byte v[0:1], v2
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_flat_hi_i8_shift(i8 addrspace(4)* %out, i32 %value) #0 {
|
||||
entry:
|
||||
%hi32 = lshr i32 %value, 16
|
||||
%hi = trunc i32 %hi32 to i8
|
||||
store i8 %hi, i8 addrspace(4)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_flat_hi_v2i16_max_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:4094{{$}}
|
||||
|
||||
; VI-DAG: v_add_i32_e32
|
||||
; VI-DAG: v_addc_u32_e32
|
||||
; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI: flat_store_short v[0:1], v2{{$}}
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_flat_hi_v2i16_max_offset(i16 addrspace(4)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(4)* %out, i64 2047
|
||||
store i16 %hi, i16 addrspace(4)* %gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GCN: v_add_i32_e32
|
||||
; GCN: v_addc_u32_e32
|
||||
|
||||
; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
|
||||
; VI: flat_store_short v[0:1], v2{{$}}
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_flat_hi_v2i16_neg_offset(i16 addrspace(4)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(4)* %out, i64 -1023
|
||||
store i16 %hi, i16 addrspace(4)* %gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_max_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:4095{{$}}
|
||||
|
||||
; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI-DAG: v_add_i32_e32
|
||||
; VI-DAG: v_addc_u32_e32
|
||||
; VI: flat_store_byte v[0:1], v2{{$}}
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_flat_hi_v2i16_i8_max_offset(i8 addrspace(4)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%trunc = trunc i16 %hi to i8
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %out, i64 4095
|
||||
store i8 %trunc, i8 addrspace(4)* %gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GCN-DAG: v_add_i32_e32
|
||||
; GCN-DAG: v_addc_u32_e32
|
||||
|
||||
; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
|
||||
; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI: flat_store_byte v[0:1], v2{{$}}
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_flat_hi_v2i16_i8_neg_offset(i8 addrspace(4)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%trunc = trunc i16 %hi to i8
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %out, i64 -4095
|
||||
store i8 %trunc, i8 addrspace(4)* %gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_hi_v2i16:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
|
||||
|
||||
; VI: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; VI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_private_hi_v2i16(i16* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
; FIXME: ABI for pre-gfx9
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
store i16 %hi, i16* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_hi_v2f16:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
|
||||
|
||||
; VI: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; VI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_private_hi_v2f16(half* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
; FIXME: ABI for pre-gfx9
|
||||
%value = bitcast i32 %arg to <2 x half>
|
||||
%hi = extractelement <2 x half> %value, i32 1
|
||||
store half %hi, half* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_hi_i32_shift:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
|
||||
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; VI-NEXT: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_private_hi_i32_shift(i16* %out, i32 %value) #0 {
|
||||
entry:
|
||||
%hi32 = lshr i32 %value, 16
|
||||
%hi = trunc i32 %hi32 to i16
|
||||
store i16 %hi, i16* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_hi_v2i16_i8:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}}
|
||||
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; VI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_private_hi_v2i16_i8(i8* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%trunc = trunc i16 %hi to i8
|
||||
store i8 %trunc, i8* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_hi_i8_shift:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}}
|
||||
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; VI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_private_hi_i8_shift(i8* %out, i32 %value) #0 {
|
||||
entry:
|
||||
%hi32 = lshr i32 %value, 16
|
||||
%hi = trunc i32 %hi32 to i8
|
||||
store i8 %hi, i8* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_hi_v2i16_max_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen offset:4094{{$}}
|
||||
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; VI-NEXT: buffer_store_short v1, v0, s[0:3], s4 offen offset:4094{{$}}
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_private_hi_v2i16_max_offset(i16* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%gep = getelementptr inbounds i16, i16* %out, i64 2047
|
||||
store i16 %hi, i16* %gep
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_hi_v2i16_nooff:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s4{{$}}
|
||||
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; VI-NEXT: buffer_store_short v0, off, s[0:3], s4{{$}}
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_private_hi_v2i16_nooff(i32 %arg) #0 {
|
||||
entry:
|
||||
; FIXME: ABI for pre-gfx9
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
store volatile i16 %hi, i16* null
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_nooff:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s4{{$}}
|
||||
|
||||
; VI: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; VI: buffer_store_byte v0, off, s[0:3], s4{{$}}
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_private_hi_v2i16_i8_nooff(i32 %arg) #0 {
|
||||
entry:
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%trunc = trunc i16 %hi to i8
|
||||
store volatile i8 %trunc, i8* null
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_local_hi_v2i16:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
|
||||
|
||||
; VI: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; VI: ds_write_b16 v0, v1
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_local_hi_v2i16(i16 addrspace(3)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
; FIXME: ABI for pre-gfx9
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
store i16 %hi, i16 addrspace(3)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_local_hi_v2f16:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
|
||||
|
||||
; VI: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; VI: ds_write_b16 v0, v1
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_local_hi_v2f16(half addrspace(3)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
; FIXME: ABI for pre-gfx9
|
||||
%value = bitcast i32 %arg to <2 x half>
|
||||
%hi = extractelement <2 x half> %value, i32 1
|
||||
store half %hi, half addrspace(3)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_local_hi_i32_shift:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
|
||||
|
||||
; VI: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; VI: ds_write_b16 v0, v1
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_local_hi_i32_shift(i16 addrspace(3)* %out, i32 %value) #0 {
|
||||
entry:
|
||||
%hi32 = lshr i32 %value, 16
|
||||
%hi = trunc i32 %hi32 to i16
|
||||
store i16 %hi, i16 addrspace(3)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_local_hi_v2i16_i8:
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GFX9-NEXT: ds_write_b8_d16_hi v0, v1{{$}}
|
||||
|
||||
; VI: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; VI: ds_write_b8 v0, v1
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_local_hi_v2i16_i8(i8 addrspace(3)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%trunc = trunc i16 %hi to i8
|
||||
store i8 %trunc, i8 addrspace(3)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_local_hi_v2i16_max_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:65534{{$}}
|
||||
|
||||
; VI: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; VI: ds_write_b16 v0, v1 offset:65534{{$}}
|
||||
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @store_local_hi_v2i16_max_offset(i16 addrspace(3)* %out, i32 %arg) #0 {
|
||||
entry:
|
||||
; FIXME: ABI for pre-gfx9
|
||||
%value = bitcast i32 %arg to <2 x i16>
|
||||
%hi = extractelement <2 x i16> %value, i32 1
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(3)* %out, i64 32767
|
||||
store i16 %hi, i16 addrspace(3)* %gep
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
Loading…
x
Reference in New Issue
Block a user