mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-20 17:03:09 +00:00
R600/SI: 64-bit and larger memory access must be at least 4-byte aligned
This is true for SI only. CI+ supports unaligned memory accesses, but this requires driver support, so for now we disallow unaligned accesses for all GCN targets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227822 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
80e70ee18e
commit
d73d1062fe
@ -315,9 +315,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
if (!VT.isSimple() || VT == MVT::Other)
|
||||
return false;
|
||||
|
||||
// XXX - CI changes say "Support for unaligned memory accesses" but I don't
|
||||
// see what for specifically. The wording everywhere else seems to be the
|
||||
// same.
|
||||
// TODO - CI+ supports unaligned memory accesses, but this requires driver
|
||||
// support.
|
||||
|
||||
// XXX - The only mention I see of this in the ISA manual is for LDS direct
|
||||
// reads the "byte address and must be dword aligned". Is it also true for the
|
||||
@ -334,7 +333,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
// This applies to private, global, and constant memory.
|
||||
if (IsFast)
|
||||
*IsFast = true;
|
||||
return VT.bitsGT(MVT::i32);
|
||||
|
||||
return VT.bitsGT(MVT::i32) && Align % 4 == 0;
|
||||
}
|
||||
|
||||
EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
|
||||
|
@ -146,7 +146,7 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8>
|
||||
; SI: buffer_store_dword
|
||||
; SI: buffer_store_dword
|
||||
define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
|
||||
%load = load <8 x i8> addrspace(1)* %in, align 1
|
||||
%load = load <8 x i8> addrspace(1)* %in, align 8
|
||||
%cvt = uitofp <8 x i8> %load to <8 x float>
|
||||
store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
|
||||
ret void
|
||||
|
@ -1,18 +1,65 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SI-LABEL: {{^}}unaligned_load_store_i32:
|
||||
; SI-LABEL: {{^}}unaligned_load_store_i32_local:
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_write_b32
|
||||
; SI: s_endpgm
|
||||
define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
|
||||
define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
|
||||
%v = load i32 addrspace(3)* %p, align 1
|
||||
store i32 %v, i32 addrspace(3)* %r, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}unaligned_load_store_v4i32:
|
||||
; SI-LABEL: {{^}}unaligned_load_store_i32_global:
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_store_dword
|
||||
define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
|
||||
%v = load i32 addrspace(1)* %p, align 1
|
||||
store i32 %v, i32 addrspace(1)* %r, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}unaligned_load_store_i64_local:
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_write2_b32
|
||||
; SI: s_endpgm
|
||||
define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
|
||||
%v = load i64 addrspace(3)* %p, align 1
|
||||
store i64 %v, i64 addrspace(3)* %r, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}unaligned_load_store_i64_global:
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_store_dwordx2
|
||||
define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
|
||||
%v = load i64 addrspace(1)* %p, align 1
|
||||
store i64 %v, i64 addrspace(1)* %r, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}unaligned_load_store_v4i32_local:
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
@ -38,12 +85,36 @@ define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r
|
||||
; SI: ds_write_b32
|
||||
; SI: ds_write_b32
|
||||
; SI: s_endpgm
|
||||
define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
|
||||
define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
|
||||
%v = load <4 x i32> addrspace(3)* %p, align 1
|
||||
store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: We mark v4i32 as custom, so misaligned loads are never expanded.
|
||||
; FIXME-SI-LABEL: {{^}}unaligned_load_store_v4i32_global
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
; FIXME-SI: buffer_load_ubyte
|
||||
define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind {
|
||||
%v = load <4 x i32> addrspace(1)* %p, align 1
|
||||
store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}load_lds_i64_align_4:
|
||||
; SI: ds_read2_b32
|
||||
; SI: s_endpgm
|
||||
|
Loading…
x
Reference in New Issue
Block a user