mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-03 01:12:59 +00:00
AMDGPU: Custom lower v2i32 loads and stores
This will allow us to split up 64-bit private accesses when necessary. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268296 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
451c792dbc
commit
6b3315d64b
@ -101,6 +101,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
|
||||
setOperationAction(ISD::LOAD, MVT::i64, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
|
||||
|
||||
@ -113,6 +115,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
|
||||
setOperationAction(ISD::STORE, MVT::i64, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
|
||||
|
||||
setOperationAction(ISD::SELECT, MVT::i64, Custom);
|
||||
setOperationAction(ISD::SELECT, MVT::f64, Promote);
|
||||
AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
|
||||
@ -1905,10 +1909,17 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
|
||||
"Custom lowering for non-i32 vectors hasn't been implemented.");
|
||||
unsigned NumElements = MemVT.getVectorNumElements();
|
||||
assert(NumElements != 2 && "v2 loads are supported for all address spaces.");
|
||||
|
||||
switch (Load->getAddressSpace()) {
|
||||
unsigned AS = Load->getAddressSpace();
|
||||
if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
|
||||
AS, Load->getAlignment())) {
|
||||
SDValue Ops[2];
|
||||
std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
|
||||
return DAG.getMergeValues(Ops, DL);
|
||||
}
|
||||
|
||||
unsigned NumElements = MemVT.getVectorNumElements();
|
||||
switch (AS) {
|
||||
case AMDGPUAS::CONSTANT_ADDRESS:
|
||||
if (isMemOpUniform(Load))
|
||||
return SDValue();
|
||||
@ -1943,9 +1954,16 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
llvm_unreachable("unsupported private_element_size");
|
||||
}
|
||||
}
|
||||
case AMDGPUAS::LOCAL_ADDRESS:
|
||||
case AMDGPUAS::LOCAL_ADDRESS: {
|
||||
if (NumElements > 2)
|
||||
return SplitVectorLoad(Op, DAG);
|
||||
|
||||
if (NumElements == 2)
|
||||
return SDValue();
|
||||
|
||||
// If properly aligned, if we split we might be able to use ds_read_b64.
|
||||
return SplitVectorLoad(Op, DAG);
|
||||
}
|
||||
default:
|
||||
return SDValue();
|
||||
}
|
||||
@ -2150,10 +2168,17 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
Store->getBasePtr(), MVT::i1, Store->getMemOperand());
|
||||
}
|
||||
|
||||
assert(Store->getValue().getValueType().getScalarType() == MVT::i32);
|
||||
assert(VT.isVector() &&
|
||||
Store->getValue().getValueType().getScalarType() == MVT::i32);
|
||||
|
||||
unsigned AS = Store->getAddressSpace();
|
||||
if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
|
||||
AS, Store->getAlignment())) {
|
||||
return expandUnalignedStore(Store, DAG);
|
||||
}
|
||||
|
||||
unsigned NumElements = VT.getVectorNumElements();
|
||||
switch (Store->getAddressSpace()) {
|
||||
switch (AS) {
|
||||
case AMDGPUAS::GLOBAL_ADDRESS:
|
||||
case AMDGPUAS::FLAT_ADDRESS:
|
||||
if (NumElements > 4)
|
||||
@ -2175,9 +2200,16 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
llvm_unreachable("unsupported private_element_size");
|
||||
}
|
||||
}
|
||||
case AMDGPUAS::LOCAL_ADDRESS:
|
||||
case AMDGPUAS::LOCAL_ADDRESS: {
|
||||
if (NumElements > 2)
|
||||
return SplitVectorStore(Op, DAG);
|
||||
|
||||
if (NumElements == 2)
|
||||
return Op;
|
||||
|
||||
// If properly aligned, if we split we might be able to use ds_write_b64.
|
||||
return SplitVectorStore(Op, DAG);
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("unhandled address space");
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SI-LABEL: {{^}}unaligned_load_store_i16_local:
|
||||
@ -56,6 +56,29 @@ define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}align2_load_store_i32_global:
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_store_short
|
||||
; SI: buffer_store_short
|
||||
define void @align2_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
|
||||
%v = load i32, i32 addrspace(1)* %p, align 2
|
||||
store i32 %v, i32 addrspace(1)* %r, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}align2_load_store_i32_local:
|
||||
; SI: ds_read_u16
|
||||
; SI: ds_read_u16
|
||||
; SI: ds_write_b16
|
||||
; SI: ds_write_b16
|
||||
define void @align2_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
|
||||
%v = load i32, i32 addrspace(3)* %p, align 2
|
||||
store i32 %v, i32 addrspace(3)* %r, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Unnecessary packing and unpacking of bytes.
|
||||
; SI-LABEL: {{^}}unaligned_load_store_i64_local:
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
@ -65,13 +88,36 @@ define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
|
||||
; SI: ds_write_b8
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
|
||||
; SI: ds_write_b8
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
|
||||
; SI: ds_write_b8
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
|
||||
; SI: ds_write_b8
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
|
||||
; SI: ds_write_b8
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
|
||||
; SI: ds_write_b8
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI: s_endpgm
|
||||
define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
|
||||
@ -80,6 +126,53 @@ define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}unaligned_load_store_v2i32_local:
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
|
||||
; SI: ds_write_b8
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
|
||||
; SI: ds_write_b8
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
|
||||
; SI: ds_write_b8
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
|
||||
; SI: ds_write_b8
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
|
||||
; SI: ds_write_b8
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
|
||||
; SI: ds_write_b8
|
||||
; XSI-NOT: v_or_b32
|
||||
; XSI-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI: s_endpgm
|
||||
define void @unaligned_load_store_v2i32_local(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) {
|
||||
%v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1
|
||||
store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}unaligned_load_store_i64_global:
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
@ -89,6 +182,10 @@ define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
|
||||
; XSI-NOT: v_or_
|
||||
; XSI-NOT: v_lshl
|
||||
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
|
Loading…
Reference in New Issue
Block a user