mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-23 04:28:30 +00:00
R600/SI: Also try to use 0 base for misaligned 8-byte DS loads.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219823 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7fdd553b66
commit
8b3a9205b7
@ -823,6 +823,23 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
|
||||
}
|
||||
}
|
||||
|
||||
if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
|
||||
unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
|
||||
unsigned DWordOffset1 = DWordOffset0 + 1;
|
||||
assert(4 * DWordOffset0 == CAddr->getZExtValue());
|
||||
|
||||
if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
|
||||
SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
MachineSDNode *MovZero
|
||||
= CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
|
||||
SDLoc(Addr), MVT::i32, Zero);
|
||||
Base = SDValue(MovZero, 0);
|
||||
Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
|
||||
Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// default case
|
||||
Base = Addr;
|
||||
Offset0 = CurDAG->getTargetConstant(0, MVT::i8);
|
||||
|
@ -406,6 +406,36 @@ define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
|
||||
ret void
|
||||
}
|
||||
|
||||
@bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
|
||||
|
||||
; SI-LABEL: @load_misaligned64_constant_offsets
|
||||
; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
|
||||
define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
|
||||
%val0 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
|
||||
%val1 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
|
||||
%sum = add i64 %val0, %val1
|
||||
store i64 %sum, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
@bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
|
||||
|
||||
; SI-LABEL: @load_misaligned64_constant_large_offsets
|
||||
; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
|
||||
; SI-DAG: V_MOV_B32_e32 [[BASE1:v[0-9]+]], 0x4000
|
||||
; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
|
||||
; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
|
||||
; SI: S_ENDPGM
|
||||
define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
|
||||
%val0 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
|
||||
%val1 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
|
||||
%sum = add i64 %val0, %val1
|
||||
store i64 %sum, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] zeroinitializer, align 4
|
||||
@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] zeroinitializer, align 4
|
||||
|
||||
|
@ -341,6 +341,32 @@ define void @store_constant_disjoint_offsets() {
|
||||
ret void
|
||||
}
|
||||
|
||||
@bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
|
||||
|
||||
; SI-LABEL: @store_misaligned64_constant_offsets
|
||||
; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
|
||||
define void @store_misaligned64_constant_offsets() {
|
||||
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
|
||||
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
@bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
|
||||
|
||||
; SI-LABEL: @store_misaligned64_constant_large_offsets
|
||||
; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
|
||||
; SI-DAG: V_MOV_B32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
|
||||
; SI-DAG: DS_WRITE2_B32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI-DAG: DS_WRITE2_B32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: S_ENDPGM
|
||||
define void @store_misaligned64_constant_large_offsets() {
|
||||
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
|
||||
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] zeroinitializer, align 4
|
||||
@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] zeroinitializer, align 4
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user