AMDGPU: Fix not accounting for instruction size in bundles

These were counted as 0. Fixes branch limit exceeded errors
in some large programs.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314944 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2017-10-04 22:59:12 +00:00
parent f1bd8070e1
commit b151df8f6d
3 changed files with 68 additions and 1 deletions

View File

@ -4371,6 +4371,18 @@ unsigned SIInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
return AMDGPU::NoRegister;
}
unsigned SIInstrInfo::getInstBundleSize(const MachineInstr &MI) const {
unsigned Size = 0;
MachineBasicBlock::const_instr_iterator I = MI.getIterator();
MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
assert(!I->isBundle() && "No nested bundle!");
Size += getInstSizeInBytes(*I);
}
return Size;
}
unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc);
@ -4414,9 +4426,10 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
case TargetOpcode::DBG_VALUE:
case TargetOpcode::BUNDLE:
case TargetOpcode::EH_LABEL:
return 0;
case TargetOpcode::BUNDLE:
return getInstBundleSize(MI);
case TargetOpcode::INLINEASM: {
const MachineFunction *MF = MI.getParent()->getParent();
const char *AsmStr = MI.getOperand(0).getSymbolName();

View File

@ -818,6 +818,7 @@ public:
unsigned isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
unsigned getInstBundleSize(const MachineInstr &MI) const;
unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;

View File

@ -0,0 +1,53 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=5 < %s | FileCheck -check-prefix=GCN %s
; Restrict maximum branch to between +15 and -16 dwords
; Instructions inside a bundle were collectively counted as
; 0-bytes. Make sure this is accounted for when estimating branch
; distances
; Bundle used for address in call sequence: 20 bytes
; s_getpc_b64
; s_add_u32
; s_addc_u32
; plus additional overhead
; s_setpc_b64
; and some register copies
declare void @func() #0
; GCN-LABEL: {{^}}bundle_size:
; GCN: s_cbranch_scc0 [[BB_EXPANSION:BB[0-9]+_[0-9]+]]
; GCN: s_getpc_b64
; GCN-NEXT: s_add_u32
; GCN-NEXT: s_addc_u32
; GCN-NEXT: s_setpc_b64
; GCN: {{^}}[[BB_EXPANSION]]:
; GCN: s_getpc_b64
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, func@
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, func@
; GCN: s_swappc_b64
define amdgpu_kernel void @bundle_size(i32 addrspace(1)* %arg, i32 %cnd) #0 {
bb:
%cmp = icmp eq i32 %cnd, 0
br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch
bb2:
call void @func()
call void asm sideeffect
"v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64", ""() #0
br label %bb3
bb3:
store volatile i32 %cnd, i32 addrspace(1)* %arg
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }