r600/sfn: Use a heuristic to keep SSBO setup and store close

When SSBO instructions use constant address values the address loading
is immediately ready, scheduling the address loads early increases
the register pressure, so force a new instruction block to work around
this problem.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6975

Fixes: 79ca456b4837b3bc21cf9ef3c03c505c4b4909f6
   r600/sfn: rewrite NIR backend

v2: do handling in shader block to be thread save (hinted to by Filip)

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Reviewed-by: Filip Gawin <filip@gawin.net> (v1)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18212>
(cherry picked from commit c81fe5b235c0204e9f77d13411004a3307aa6301)
This commit is contained in:
Gert Wollny 2022-08-23 15:46:36 +02:00 committed by Dylan Baker
parent b84c8968c7
commit 29415adbaa
5 changed files with 7 additions and 8 deletions

View File

@ -10291,7 +10291,7 @@
"description": "r600/sfn: Use a heuristic to keep SSBO setup and store close",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "79ca456b4837b3bc21cf9ef3c03c505c4b4909f6"
},

View File

@ -210,6 +210,8 @@ public:
bool kcache_reservation_failed() const { return m_kcache_alloc_failed;}
int inc_rat_emitted() { return ++m_emitted_rat_instr;}
static void set_chipclass(r600_chip_class chip_class);
private:
@ -234,6 +236,7 @@ private:
int m_lds_group_requirement{0};
AluInstr *m_lds_group_start{nullptr};
static unsigned s_max_kcache_banks;
int m_emitted_rat_instr{0};
};
class InstrWithVectorResult : public Instr {

View File

@ -559,12 +559,6 @@ bool RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader)
bool RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader)
{
/* Forche the scheduler to not move the preparation too far away, by starting
* a new block (TODO: better priority handling in the scheduler)*/
if (nir_src_num_components(instr->src[0]) > 2)
shader.start_new_block(0);
auto &vf = shader.value_factory();
auto orig_addr = vf.src(instr->src[2], 0);

View File

@ -194,7 +194,6 @@ private:
int m_element_size{3};
bool m_need_ack{false};
bool m_need_mark{false};
};

View File

@ -1085,6 +1085,9 @@ void Shader::InstructionChain::visit(RatInstr *instr)
if (prepare_mem_barrier)
instr->set_ack();
if (this_shader->m_current_block->inc_rat_emitted() > 15)
this_shader->start_new_block(0);
}
void Shader::InstructionChain::apply(Instr *current, Instr **last) {