mirror of
https://gitee.com/openharmony/third_party_mesa3d
synced 2024-11-27 09:31:03 +00:00
radv: allocate shaders to 32-bit address to skip PGM_HI
This reduces the number of emitted registers. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12466>
This commit is contained in:
parent
2dc90ca8a4
commit
e0353296da
@ -4420,9 +4420,7 @@ radv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs, const struct radv_pipelin
|
||||
uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
|
||||
uint32_t rsrc2 = shader->config.rsrc2;
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
|
||||
radeon_emit(cs, va >> 8);
|
||||
radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
|
||||
radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
|
||||
rsrc2 |= S_00B52C_LDS_SIZE(num_lds_blocks);
|
||||
if (pipeline->device->physical_device->rad_info.chip_class == GFX7 &&
|
||||
@ -4447,9 +4445,8 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
|
||||
: pipeline->shaders[MESA_SHADER_VERTEX];
|
||||
const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info;
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
|
||||
radeon_emit(cs, va >> 8);
|
||||
radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
|
||||
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
|
||||
radeon_emit(cs, shader->config.rsrc1);
|
||||
radeon_emit(cs, shader->config.rsrc2);
|
||||
@ -4592,13 +4589,9 @@ radv_pipeline_generate_hw_hs(struct radeon_cmdbuf *cs, const struct radv_pipelin
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
|
||||
radeon_emit(cs, va >> 8);
|
||||
radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
|
||||
radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(cs, R_00B410_SPI_SHADER_PGM_LO_LS, 2);
|
||||
radeon_emit(cs, va >> 8);
|
||||
radeon_emit(cs, S_00B414_MEM_BASE(va >> 40));
|
||||
radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
}
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2);
|
||||
@ -4793,13 +4786,9 @@ radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
|
||||
radeon_emit(cs, va >> 8);
|
||||
radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
|
||||
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(cs, R_00B210_SPI_SHADER_PGM_LO_ES, 2);
|
||||
radeon_emit(cs, va >> 8);
|
||||
radeon_emit(cs, S_00B214_MEM_BASE(va >> 40));
|
||||
radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
}
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
|
||||
@ -5576,9 +5565,7 @@ radv_pipeline_generate_hw_cs(struct radeon_cmdbuf *cs, const struct radv_pipelin
|
||||
uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
|
||||
struct radv_device *device = pipeline->device;
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
|
||||
radeon_emit(cs, va >> 8);
|
||||
radeon_emit(cs, S_00B834_DATA(va >> 40));
|
||||
radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8);
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
|
||||
radeon_emit(cs, shader->config.rsrc1);
|
||||
|
@ -1059,7 +1059,7 @@ radv_alloc_shader_memory(struct radv_device *device, struct radv_shader_variant
|
||||
slab->size = MAX2(256 * 1024, shader->code_size);
|
||||
VkResult result = device->ws->buffer_create(
|
||||
device->ws, slab->size, 256, RADEON_DOMAIN_VRAM,
|
||||
RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
||||
RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_32BIT |
|
||||
(device->physical_device->rad_info.cpdma_prefetch_writes_memory ? 0
|
||||
: RADEON_FLAG_READ_ONLY),
|
||||
RADV_BO_PRIORITY_SHADER, 0, &slab->bo);
|
||||
|
@ -79,6 +79,9 @@ si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, 0);
|
||||
|
||||
radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI,
|
||||
S_00B834_DATA(device->physical_device->rad_info.address32_hi >> 8));
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
|
||||
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
|
||||
* renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */
|
||||
@ -291,6 +294,23 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
||||
radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
|
||||
}
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS,
|
||||
S_00B524_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
|
||||
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES,
|
||||
S_00B324_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
|
||||
} else if (device->physical_device->rad_info.chip_class == GFX9) {
|
||||
radeon_set_sh_reg(cs, R_00B414_SPI_SHADER_PGM_HI_LS,
|
||||
S_00B414_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
|
||||
radeon_set_sh_reg(cs, R_00B214_SPI_SHADER_PGM_HI_ES,
|
||||
S_00B214_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
|
||||
} else {
|
||||
radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS,
|
||||
S_00B524_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
|
||||
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES,
|
||||
S_00B324_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
|
||||
}
|
||||
|
||||
unsigned cu_mask_ps = 0xffffffff;
|
||||
|
||||
/* It's wasteful to enable all CUs for PS if shader arrays have a
|
||||
|
Loading…
Reference in New Issue
Block a user