mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-17 11:39:11 +00:00
AMDGPU: Insert wait at start of callee functions
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300000 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
56db90276b
commit
8c86ad544b
@ -690,5 +690,19 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
||||
for (MachineInstr *I : RemoveMI)
|
||||
I->eraseFromParent();
|
||||
|
||||
if (!MFI->isEntryFunction()) {
|
||||
// Wait for any outstanding memory operations that the input registers may
|
||||
// depend on. We can't track them and it's better to to the wait after the
|
||||
// costly call sequence.
|
||||
|
||||
// TODO: Could insert earlier and schedule more liberally with operations
|
||||
// that only use caller preserved registers.
|
||||
MachineBasicBlock &EntryBB = MF.front();
|
||||
BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
|
||||
.addImm(0);
|
||||
|
||||
Changes = true;
|
||||
}
|
||||
|
||||
return Changes;
|
||||
}
|
||||
|
@ -26,7 +26,7 @@
|
||||
|
||||
; ELF: Symbol {
|
||||
; ELF: Name: simple
|
||||
; ELF: Size: 288
|
||||
; ELF: Size: 292
|
||||
; ELF: Type: Function (0x2)
|
||||
; ELF: }
|
||||
|
||||
|
25
test/CodeGen/AMDGPU/insert-waits-callee.mir
Normal file
25
test/CodeGen/AMDGPU/insert-waits-callee.mir
Normal file
@ -0,0 +1,25 @@
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s
|
||||
--- |
|
||||
define float @entry_callee_wait(float %arg) #0 {
|
||||
ret float %arg
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
...
|
||||
---
|
||||
# CHECK-LABEL: name: entry_callee_wait{{$}}
|
||||
# CHECK: bb.0:
|
||||
# CHECK-NEXT: S_WAITCNT 0{{$}}
|
||||
# CHECK-NEXT: V_ADD_F32
|
||||
# CHECK-NEXT: S_SETPC_B64
|
||||
liveins:
|
||||
- { reg: '%sgpr0_sgpr1' }
|
||||
- { reg: '%vgpr0' }
|
||||
|
||||
name: entry_callee_wait
|
||||
body: |
|
||||
bb.0:
|
||||
%vgpr0 = V_ADD_F32_e32 %vgpr0, %vgpr0, implicit %exec
|
||||
S_SETPC_B64 killed %sgpr0_sgpr1
|
||||
|
||||
...
|
Loading…
x
Reference in New Issue
Block a user