mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-07 04:21:39 +00:00
AMDGPU: Reduce number of copies emitted
Instead of always inserting a copy in case the super register is itself a subregister, only extract to the super reg class if this is actually the case. This shouldn't really change codegen, but makes looking at the output of SIFixSGPRCopies easier to read. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248467 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
71c9e7e6ce
commit
bb9c0afde5
@ -1556,17 +1556,21 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
|
||||
unsigned SubIdx,
|
||||
const TargetRegisterClass *SubRC)
|
||||
const {
|
||||
assert(SuperReg.isReg());
|
||||
|
||||
unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
DebugLoc DL = MI->getDebugLoc();
|
||||
unsigned SubReg = MRI.createVirtualRegister(SubRC);
|
||||
|
||||
if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
|
||||
BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
|
||||
.addReg(SuperReg.getReg(), 0, SubIdx);
|
||||
return SubReg;
|
||||
}
|
||||
|
||||
// Just in case the super register is itself a sub-register, copy it to a new
|
||||
// value so we don't need to worry about merging its subreg index with the
|
||||
// SubIdx passed to this function. The register coalescer should be able to
|
||||
// eliminate this extra copy.
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
DebugLoc DL = MI->getDebugLoc();
|
||||
unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
|
||||
|
||||
BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
|
||||
.addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
|
||||
|
@ -147,9 +147,10 @@ endif:
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: and 0 should be replaced witht copy
|
||||
; FUNC-LABEL: {{^}}v_and_constant_i64:
|
||||
; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
|
||||
define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
|
||||
%a = load i64, i64 addrspace(1)* %aptr, align 8
|
||||
%and = and i64 %a, 1234567
|
||||
|
@ -36,15 +36,14 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: We shouldn't emit the v_mov_b32 0
|
||||
; FIXME: or 0 should be replaxed with copy
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i64_user:
|
||||
; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
|
||||
; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
|
||||
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
|
||||
; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]]
|
||||
; GCN-DAG: v_or_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[ZERO]]
|
||||
; GCN-DAG: v_or_b32_e64 v[[RESULT_HI:[0-9]+]], 0, s{{[0-9]+}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
|
||||
; GCN: s_endpgm
|
||||
define void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind {
|
||||
|
@ -3,10 +3,9 @@
|
||||
|
||||
; SI-LABEL: {{^}}s_movk_i32_k0:
|
||||
; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff{{$}}
|
||||
; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
|
||||
; SI: s_endpgm
|
||||
define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
@ -17,10 +16,9 @@ define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
|
||||
|
||||
; SI-LABEL: {{^}}s_movk_i32_k1:
|
||||
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
|
||||
; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
|
||||
; SI: s_endpgm
|
||||
define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
@ -31,10 +29,9 @@ define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
|
||||
|
||||
; SI-LABEL: {{^}}s_movk_i32_k2:
|
||||
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
|
||||
; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 64{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 64, v[[HI_VREG]]
|
||||
; SI: s_endpgm
|
||||
define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
@ -45,10 +42,9 @@ define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
|
||||
|
||||
; SI-LABEL: {{^}}s_movk_i32_k3:
|
||||
; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
|
||||
; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
|
||||
; SI: s_endpgm
|
||||
define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
@ -59,10 +55,9 @@ define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
|
||||
|
||||
; SI-LABEL: {{^}}s_movk_i32_k4:
|
||||
; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x20000{{$}}
|
||||
; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
|
||||
; SI: s_endpgm
|
||||
define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
@ -87,10 +82,9 @@ define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
|
||||
|
||||
; SI-LABEL: {{^}}s_movk_i32_k6:
|
||||
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x41{{$}}
|
||||
; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 63{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 63, v[[HI_VREG]]
|
||||
; SI: s_endpgm
|
||||
define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
|
Loading…
Reference in New Issue
Block a user