mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-30 23:20:54 +00:00
AMDGPU: Make m0 unallocatable
m0 may need to be written for spill code, so we don't want general code uses relying on the value stored in it. This introduces a few code quality regressions where copies from m0 are not coalesced into copies of a copy of m0. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287841 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6c4098223f
commit
124ad83dae
@ -253,7 +253,7 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
|
||||
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
|
||||
switch (NumVectorElts) {
|
||||
case 1:
|
||||
return AMDGPU::SReg_32RegClassID;
|
||||
return AMDGPU::SReg_32_XM0RegClassID;
|
||||
case 2:
|
||||
return AMDGPU::SReg_64RegClassID;
|
||||
case 4:
|
||||
|
@ -59,7 +59,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
|
||||
addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
|
||||
|
||||
addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
|
||||
addRegisterClass(MVT::i32, &AMDGPU::SReg_32_XM0RegClass);
|
||||
addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass);
|
||||
|
||||
addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass);
|
||||
@ -79,8 +79,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
|
||||
|
||||
if (Subtarget->has16BitInsts()) {
|
||||
addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);
|
||||
addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass);
|
||||
addRegisterClass(MVT::i16, &AMDGPU::SReg_32_XM0RegClass);
|
||||
addRegisterClass(MVT::f16, &AMDGPU::SReg_32_XM0RegClass);
|
||||
}
|
||||
|
||||
computeRegisterProperties(STI.getRegisterInfo());
|
||||
@ -941,25 +941,25 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
// Start adding system SGPRs.
|
||||
if (Info->hasWorkGroupIDX()) {
|
||||
unsigned Reg = Info->addWorkGroupIDX();
|
||||
MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
|
||||
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
|
||||
CCInfo.AllocateReg(Reg);
|
||||
}
|
||||
|
||||
if (Info->hasWorkGroupIDY()) {
|
||||
unsigned Reg = Info->addWorkGroupIDY();
|
||||
MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
|
||||
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
|
||||
CCInfo.AllocateReg(Reg);
|
||||
}
|
||||
|
||||
if (Info->hasWorkGroupIDZ()) {
|
||||
unsigned Reg = Info->addWorkGroupIDZ();
|
||||
MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
|
||||
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
|
||||
CCInfo.AllocateReg(Reg);
|
||||
}
|
||||
|
||||
if (Info->hasWorkGroupInfo()) {
|
||||
unsigned Reg = Info->addWorkGroupInfo();
|
||||
MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
|
||||
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
|
||||
CCInfo.AllocateReg(Reg);
|
||||
}
|
||||
|
||||
@ -2414,15 +2414,15 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
SI::KernelInputOffsets::LOCAL_SIZE_Z);
|
||||
case Intrinsic::amdgcn_workgroup_id_x:
|
||||
case Intrinsic::r600_read_tgid_x:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass,
|
||||
TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_X), VT);
|
||||
case Intrinsic::amdgcn_workgroup_id_y:
|
||||
case Intrinsic::r600_read_tgid_y:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass,
|
||||
TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Y), VT);
|
||||
case Intrinsic::amdgcn_workgroup_id_z:
|
||||
case Intrinsic::r600_read_tgid_z:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass,
|
||||
TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Z), VT);
|
||||
case Intrinsic::amdgcn_workitem_id_x:
|
||||
case Intrinsic::r600_read_tidig_x:
|
||||
@ -4182,7 +4182,7 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
default:
|
||||
return std::make_pair(0U, nullptr);
|
||||
case 32:
|
||||
return std::make_pair(0U, &AMDGPU::SReg_32RegClass);
|
||||
return std::make_pair(0U, &AMDGPU::SReg_32_XM0RegClass);
|
||||
case 64:
|
||||
return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
|
||||
case 128:
|
||||
|
@ -364,7 +364,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
return;
|
||||
}
|
||||
|
||||
if (RC == &AMDGPU::SReg_32RegClass) {
|
||||
if (RC == &AMDGPU::SReg_32_XM0RegClass ||
|
||||
RC == &AMDGPU::SReg_32RegClass) {
|
||||
if (SrcReg == AMDGPU::SCC) {
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
|
||||
.addImm(-1)
|
||||
|
@ -233,7 +233,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
|
||||
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||
unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
|
||||
|
||||
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
|
||||
.addImm(Offset);
|
||||
|
@ -120,6 +120,11 @@ def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
|
||||
let CopyCost = 1;
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
// TODO: Do we need to set DwarfRegAlias on register tuples?
|
||||
|
||||
// SGPR 32-bit registers
|
||||
@ -259,8 +264,9 @@ def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
|
||||
|
||||
// Register class for all scalar registers (SGPRs + Special Registers)
|
||||
def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
|
||||
(add SReg_32_XM0, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)> {
|
||||
(add SReg_32_XM0, M0_CLASS)> {
|
||||
let AllocationPriority = 1;
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)> {
|
||||
|
@ -437,7 +437,7 @@ bool SIWholeQuadMode::requiresCorrectState(const MachineInstr &MI) const {
|
||||
MachineBasicBlock::iterator
|
||||
SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator Before) {
|
||||
unsigned SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||
unsigned SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
|
||||
|
||||
MachineInstr *Save =
|
||||
BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), SaveReg)
|
||||
|
@ -27,9 +27,9 @@
|
||||
# CHECK: S_NOP 0, implicit undef %5.sub0
|
||||
name: test0
|
||||
registers:
|
||||
- { id: 0, class: sreg_32 }
|
||||
- { id: 1, class: sreg_32 }
|
||||
- { id: 2, class: sreg_32 }
|
||||
- { id: 0, class: sreg_32_xm0 }
|
||||
- { id: 1, class: sreg_32_xm0 }
|
||||
- { id: 2, class: sreg_32_xm0 }
|
||||
- { id: 3, class: sreg_128 }
|
||||
- { id: 4, class: sreg_64 }
|
||||
- { id: 5, class: sreg_64 }
|
||||
@ -87,13 +87,13 @@ registers:
|
||||
- { id: 0, class: sreg_128 }
|
||||
- { id: 1, class: sreg_128 }
|
||||
- { id: 2, class: sreg_64 }
|
||||
- { id: 3, class: sreg_32 }
|
||||
- { id: 3, class: sreg_32_xm0 }
|
||||
- { id: 4, class: sreg_128 }
|
||||
- { id: 5, class: sreg_64 }
|
||||
- { id: 6, class: sreg_32 }
|
||||
- { id: 7, class: sreg_32 }
|
||||
- { id: 6, class: sreg_32_xm0 }
|
||||
- { id: 7, class: sreg_32_xm0 }
|
||||
- { id: 8, class: sreg_64 }
|
||||
- { id: 9, class: sreg_32 }
|
||||
- { id: 9, class: sreg_32_xm0 }
|
||||
- { id: 10, class: sreg_128 }
|
||||
body: |
|
||||
bb.0:
|
||||
@ -162,12 +162,12 @@ body: |
|
||||
|
||||
name: test2
|
||||
registers:
|
||||
- { id: 0, class: sreg_32 }
|
||||
- { id: 1, class: sreg_32 }
|
||||
- { id: 0, class: sreg_32_xm0 }
|
||||
- { id: 1, class: sreg_32_xm0 }
|
||||
- { id: 2, class: sreg_64 }
|
||||
- { id: 3, class: sreg_128 }
|
||||
- { id: 4, class: sreg_32 }
|
||||
- { id: 5, class: sreg_32 }
|
||||
- { id: 4, class: sreg_32_xm0 }
|
||||
- { id: 5, class: sreg_32_xm0 }
|
||||
- { id: 6, class: sreg_64 }
|
||||
- { id: 7, class: sreg_128 }
|
||||
- { id: 8, class: sreg_64 }
|
||||
@ -260,7 +260,7 @@ body: |
|
||||
name: test5
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sreg_32 }
|
||||
- { id: 0, class: sreg_32_xm0 }
|
||||
- { id: 1, class: sreg_64 }
|
||||
body: |
|
||||
bb.0:
|
||||
@ -286,9 +286,9 @@ body: |
|
||||
name: loop0
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sreg_32 }
|
||||
- { id: 1, class: sreg_32 }
|
||||
- { id: 2, class: sreg_32 }
|
||||
- { id: 0, class: sreg_32_xm0 }
|
||||
- { id: 1, class: sreg_32_xm0 }
|
||||
- { id: 2, class: sreg_32_xm0 }
|
||||
- { id: 3, class: sreg_128 }
|
||||
- { id: 4, class: sreg_128 }
|
||||
- { id: 5, class: sreg_128 }
|
||||
@ -339,10 +339,10 @@ body: |
|
||||
name: loop1
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sreg_32 }
|
||||
- { id: 1, class: sreg_32 }
|
||||
- { id: 2, class: sreg_32 }
|
||||
- { id: 3, class: sreg_32 }
|
||||
- { id: 0, class: sreg_32_xm0 }
|
||||
- { id: 1, class: sreg_32_xm0 }
|
||||
- { id: 2, class: sreg_32_xm0 }
|
||||
- { id: 3, class: sreg_32_xm0 }
|
||||
- { id: 4, class: sreg_128 }
|
||||
- { id: 5, class: sreg_128 }
|
||||
- { id: 6, class: sreg_128 }
|
||||
@ -390,7 +390,7 @@ body: |
|
||||
name: loop2
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sreg_32 }
|
||||
- { id: 0, class: sreg_32_xm0 }
|
||||
- { id: 1, class: sreg_128 }
|
||||
- { id: 2, class: sreg_128 }
|
||||
- { id: 3, class: sreg_128 }
|
||||
|
@ -22,10 +22,11 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Should be able to avoid copy
|
||||
; GCN-LABEL: {{^}}inline_sreg_constraint_m0:
|
||||
; GCN: s_mov_b32 m0, -1
|
||||
; GCN-NOT: s_mov_b32 s{{[0-9]+}}, m0
|
||||
; GCN: ; use m0
|
||||
; GCN: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
|
||||
; GCN: ; use [[COPY_M0]]
|
||||
define void @inline_sreg_constraint_m0() {
|
||||
%m0 = tail call i32 asm sideeffect "s_mov_b32 m0, -1", "={M0}"()
|
||||
tail call void asm sideeffect "; use $0", "s"(i32 %m0)
|
||||
|
@ -22,7 +22,8 @@ define void @test_readfirstlane_imm(i32 addrspace(1)* %out) #1 {
|
||||
; TODO: m0 should be folded.
|
||||
; CHECK-LABEL: {{^}}test_readfirstlane_m0:
|
||||
; CHECK: s_mov_b32 m0, -1
|
||||
; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], m0
|
||||
; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
|
||||
; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], [[COPY_M0]]
|
||||
; CHECK: v_readfirstlane_b32 s{{[0-9]+}}, [[VVAL]]
|
||||
define void @test_readfirstlane_m0(i32 addrspace(1)* %out) #1 {
|
||||
%m0 = call i32 asm "s_mov_b32 m0, -1", "={M0}"()
|
||||
|
@ -22,7 +22,8 @@ define void @test_readlane_imm_sreg(i32 addrspace(1)* %out, i32 %src1) #1 {
|
||||
; TODO: m0 should be folded.
|
||||
; CHECK-LABEL: {{^}}test_readlane_m0_sreg:
|
||||
; CHECK: s_mov_b32 m0, -1
|
||||
; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], m0
|
||||
; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
|
||||
; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], [[COPY_M0]]
|
||||
; CHECK: v_readlane_b32 s{{[0-9]+}}, [[VVAL]], s{{[0-9]+}}
|
||||
define void @test_readlane_m0_sreg(i32 addrspace(1)* %out, i32 %src1) #1 {
|
||||
%m0 = call i32 asm "s_mov_b32 m0, -1", "={M0}"()
|
||||
|
@ -3,9 +3,11 @@
|
||||
declare i32 @llvm.read_register.i32(metadata) #0
|
||||
declare i64 @llvm.read_register.i64(metadata) #0
|
||||
|
||||
; FIXME: Should be able to eliminate copy
|
||||
; CHECK-LABEL: {{^}}test_read_m0:
|
||||
; CHECK: s_mov_b32 m0, -1
|
||||
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], m0
|
||||
; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
|
||||
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], [[COPY_M0]]
|
||||
; CHECK: buffer_store_dword [[COPY]]
|
||||
define void @test_read_m0(i32 addrspace(1)* %out) #0 {
|
||||
store volatile i32 0, i32 addrspace(3)* undef
|
||||
|
@ -9,38 +9,39 @@
|
||||
; GCN-LABEL: {{^}}spill_m0:
|
||||
; TOSMEM: s_mov_b32 s84, SCRATCH_RSRC_DWORD0
|
||||
|
||||
; GCN: s_cmp_lg_u32
|
||||
; GCN-DAG: s_cmp_lg_u32
|
||||
|
||||
; TOVGPR: s_mov_b32 vcc_hi, m0
|
||||
; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], vcc_hi, 0
|
||||
; TOVGPR-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
|
||||
; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0
|
||||
|
||||
; TOVMEM: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], m0
|
||||
; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
|
||||
; TOVMEM-DAG: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]]
|
||||
; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill
|
||||
; TOVMEM: s_waitcnt vmcnt(0)
|
||||
|
||||
; TOSMEM: s_mov_b32 vcc_hi, m0
|
||||
; TOSMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
|
||||
; TOSMEM: s_mov_b32 m0, s3{{$}}
|
||||
; TOSMEM-NOT: vcc_hi
|
||||
; TOSMEM: s_buffer_store_dword vcc_hi, s[84:87], m0 ; 4-byte Folded Spill
|
||||
; TOSMEM-NOT: [[M0_COPY]]
|
||||
; TOSMEM: s_buffer_store_dword [[M0_COPY]], s[84:87], m0 ; 4-byte Folded Spill
|
||||
; TOSMEM: s_waitcnt lgkmcnt(0)
|
||||
|
||||
; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: [[ENDIF]]:
|
||||
; TOVGPR: v_readlane_b32 vcc_hi, [[SPILL_VREG]], 0
|
||||
; TOVGPR: s_mov_b32 m0, vcc_hi
|
||||
; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], 0
|
||||
; TOVGPR: s_mov_b32 m0, [[M0_RESTORE]]
|
||||
|
||||
; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Reload
|
||||
; TOVMEM: s_waitcnt vmcnt(0)
|
||||
; TOVMEM: v_readfirstlane_b32 vcc_hi, [[RELOAD_VREG]]
|
||||
; TOVMEM: s_mov_b32 m0, vcc_hi
|
||||
; TOVMEM: v_readfirstlane_b32 [[M0_RESTORE:s[0-9]+]], [[RELOAD_VREG]]
|
||||
; TOVMEM: s_mov_b32 m0, [[M0_RESTORE]]
|
||||
|
||||
; TOSMEM: s_mov_b32 m0, s3{{$}}
|
||||
; TOSMEM: s_buffer_load_dword vcc_hi, s[84:87], m0 ; 4-byte Folded Reload
|
||||
; TOSMEM-NOT: vcc_hi
|
||||
; TOSMEM: s_mov_b32 m0, vcc_hi
|
||||
; TOSMEM: s_buffer_load_dword [[M0_RESTORE:s[0-9]+]], s[84:87], m0 ; 4-byte Folded Reload
|
||||
; TOSMEM-NOT: [[M0_RESTORE]]
|
||||
; TOSMEM: s_mov_b32 m0, [[M0_RESTORE]]
|
||||
|
||||
; GCN: s_add_i32 m0, m0, 1
|
||||
; GCN: s_add_i32 s{{[0-9]+}}, m0, 1
|
||||
define void @spill_m0(i32 %cond, i32 addrspace(1)* %out) #0 {
|
||||
entry:
|
||||
%m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={M0}"() #0
|
||||
|
@ -6,14 +6,14 @@
|
||||
name: phi_visit_order
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sreg_32 }
|
||||
- { id: 0, class: sreg_32_xm0 }
|
||||
- { id: 1, class: sreg_64 }
|
||||
- { id: 2, class: sreg_32 }
|
||||
- { id: 2, class: sreg_32_xm0 }
|
||||
- { id: 7, class: vgpr_32 }
|
||||
- { id: 8, class: sreg_32 }
|
||||
- { id: 8, class: sreg_32_xm0 }
|
||||
- { id: 9, class: vgpr_32 }
|
||||
- { id: 10, class: sreg_64 }
|
||||
- { id: 11, class: sreg_32 }
|
||||
- { id: 11, class: sreg_32_xm0 }
|
||||
|
||||
body: |
|
||||
; GCN-LABEL: name: phi_visit_order
|
||||
|
Loading…
Reference in New Issue
Block a user