mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-04-01 07:11:45 +00:00
[AMDGPU][Waitcnt] Fix handling of flat instrs
On GFX9 and earlier, flat memory ops may decrement VMCNT out-of-order as well as LGKMCNT out-of-order. Differential Revision: https://reviews.llvm.org/D46616 llvm-svn: 333926
This commit is contained in:
parent
1ee706885e
commit
77bed3607d
@ -472,6 +472,10 @@ public:
|
||||
return FlatScratchInsts;
|
||||
}
|
||||
|
||||
bool hasFlatLgkmVMemCountInOrder() const {
|
||||
return getGeneration() > GFX9;
|
||||
}
|
||||
|
||||
bool hasD16LoadStore() const {
|
||||
return getGeneration() >= GFX9;
|
||||
}
|
||||
|
@ -136,7 +136,7 @@ enum RegisterMapping {
|
||||
// "s_waitcnt 0" before use.
|
||||
class BlockWaitcntBrackets {
|
||||
public:
|
||||
BlockWaitcntBrackets() {
|
||||
BlockWaitcntBrackets(const SISubtarget *SubTarget) : ST(SubTarget) {
|
||||
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
|
||||
T = (enum InstCounterType)(T + 1)) {
|
||||
memset(VgprScores[T], 0, sizeof(VgprScores[T]));
|
||||
@ -314,6 +314,7 @@ public:
|
||||
void dump() { print(dbgs()); }
|
||||
|
||||
private:
|
||||
const SISubtarget *ST = nullptr;
|
||||
bool WaitAtBeginning = false;
|
||||
bool RevisitLoop = false;
|
||||
bool MixedExpTypes = false;
|
||||
@ -735,9 +736,12 @@ unsigned int BlockWaitcntBrackets::updateByWait(InstCounterType T,
|
||||
const int32_t LB = getScoreLB(T);
|
||||
const int32_t UB = getScoreUB(T);
|
||||
if ((UB >= ScoreToWait) && (ScoreToWait > LB)) {
|
||||
if (T == VM_CNT && hasPendingFlat()) {
|
||||
// If there is a pending FLAT operation, and this is a VM waitcnt,
|
||||
// then we need to force a waitcnt 0 for VM.
|
||||
if ((T == VM_CNT || T == LGKM_CNT) &&
|
||||
hasPendingFlat() &&
|
||||
!ST->hasFlatLgkmVMemCountInOrder()) {
|
||||
// If there is a pending FLAT operation, and this is a VMem or LGKM
|
||||
// waitcnt and the target can report early completion, then we need
|
||||
// to force a waitcnt 0.
|
||||
NeedWait = CNT_MASK(T);
|
||||
setScoreLB(T, getScoreUB(T));
|
||||
} else if (counterOutOfOrder(T)) {
|
||||
@ -1200,7 +1204,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
|
||||
if (!ScoreBracket) {
|
||||
assert(!BlockVisitedSet.count(TBB));
|
||||
BlockWaitcntBracketsMap[TBB] =
|
||||
llvm::make_unique<BlockWaitcntBrackets>();
|
||||
llvm::make_unique<BlockWaitcntBrackets>(ST);
|
||||
ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
|
||||
}
|
||||
ScoreBracket->setRevisitLoop(true);
|
||||
@ -1879,7 +1883,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
|
||||
if (!ScoreBrackets) {
|
||||
BlockWaitcntBracketsMap[&MBB] = llvm::make_unique<BlockWaitcntBrackets>();
|
||||
BlockWaitcntBracketsMap[&MBB] = llvm::make_unique<BlockWaitcntBrackets>(ST);
|
||||
ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
|
||||
}
|
||||
ScoreBrackets->setPostOrder(MBB.getNumber());
|
||||
|
@ -1,4 +1,5 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass si-insert-waitcnts %s -o - | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX89 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX89 %s
|
||||
|
||||
--- |
|
||||
define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4,
|
||||
@ -30,22 +31,14 @@
|
||||
|
||||
# CHECK-LABEL: bb.1:
|
||||
# CHECK: FLAT_LOAD_DWORD
|
||||
# CHECK: S_WAITCNT 368
|
||||
# GFX89: S_WAITCNT 112
|
||||
# CHECK: FLAT_LOAD_DWORDX4
|
||||
# The first load has no mem operand, so we should assume it accesses the flat
|
||||
# address space.
|
||||
# s_waitcnt lgkmcnt(1)
|
||||
# CHECK-NEXT: S_WAITCNT 383
|
||||
|
||||
# CHECK-LABEL: bb.2:
|
||||
# CHECK: FLAT_LOAD_DWORD
|
||||
# CHECK: S_WAITCNT 368
|
||||
# GFX89: S_WAITCNT 112
|
||||
# CHECK: FLAT_LOAD_DWORDX4
|
||||
|
||||
# One outstanding load accesses the flat address space.
|
||||
# s_waitcnt lgkmcnt(1)
|
||||
# CHECK-NEXT: S_WAITCNT 383
|
||||
|
||||
name: flat_zero_waitcnt
|
||||
|
||||
body: |
|
||||
|
Loading…
x
Reference in New Issue
Block a user