mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-13 16:03:58 +00:00
AMDGPU/SI: Fold operands through REG_SEQUENCE instructions
Summary: This helps mostly when we use add instructions for address calculations that contain immediates. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D12256 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247157 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
076967c806
commit
6680fc3579
@ -245,6 +245,27 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
|
||||
}
|
||||
}
|
||||
|
||||
// Special case for REG_SEQUENCE: We can't fold literals into
|
||||
// REG_SEQUENCE instructions, so we have to fold them into the
|
||||
// uses of REG_SEQUENCE.
|
||||
if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {
|
||||
unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
|
||||
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
|
||||
|
||||
for (MachineRegisterInfo::use_iterator
|
||||
RSUse = MRI.use_begin(RegSeqDstReg),
|
||||
RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {
|
||||
|
||||
MachineInstr *RSUseMI = RSUse->getParent();
|
||||
if (RSUse->getSubReg() != RegSeqDstSubReg)
|
||||
continue;
|
||||
|
||||
foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
|
||||
TII, TRI, MRI);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const MCInstrDesc &UseDesc = UseMI->getDesc();
|
||||
|
||||
// Don't fold into target independent nodes. Target independent opcodes
|
||||
|
@ -317,10 +317,8 @@ done:
|
||||
|
||||
; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 3{{$}}
|
||||
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -4{{$}}
|
||||
; GCN: s_add_u32
|
||||
; GCN: s_addc_u32
|
||||
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
|
||||
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
|
||||
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
|
||||
; GCN: s_or_b64 exec, exec
|
||||
define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
|
||||
|
@ -508,10 +508,8 @@ define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}merge_local_store_2_constants_i32:
|
||||
; GCN-DAG: s_movk_i32 [[SLO:s[0-9]+]], 0x1c8
|
||||
; GCN-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b
|
||||
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[SLO]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHI]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8
|
||||
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b
|
||||
; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}}
|
||||
define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 {
|
||||
%out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
|
||||
|
@ -51,12 +51,8 @@ define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspa
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}v_select_i64_split_imm:
|
||||
; CHECK: s_mov_b32 [[SHI:s[0-9]+]], 63
|
||||
; CHECK: s_mov_b32 [[SLO:s[0-9]+]], 0
|
||||
; CHECK-DAG: v_mov_b32_e32 [[VHI:v[0-9]+]], [[SHI]]
|
||||
; CHECK-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], [[SLO]]
|
||||
; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}}
|
||||
; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}}
|
||||
; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
|
||||
; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 63, {{v[0-9]+}}
|
||||
; CHECK: s_endpgm
|
||||
define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
|
||||
%cmp = icmp ugt i32 %cond, 5
|
||||
|
Loading…
x
Reference in New Issue
Block a user