freedreno/a6xx: texture state obj

Unfortunately gallium doesn't match what the hw wants perfectly here, in
using a separate CSO for each texture/sampler.  So we have to use a hash
table to map the collection of texture/samplers to hw state object.

We probably could use separate hw state objects for texture and sampler
state, but mesa/st tends to update the tex and samp state together.

Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
Rob Clark 2018-10-10 15:59:29 -04:00
parent e8606b11dd
commit 1b9d69410c
6 changed files with 252 additions and 34 deletions

View File

@ -56,6 +56,8 @@ fd6_context_destroy(struct pipe_context *pctx)
fd_context_cleanup_common_vbos(&fd6_ctx->base);
fd6_texture_fini(pctx);
free(fd6_ctx);
}

View File

@ -105,6 +105,9 @@ struct fd6_context {
/*{*/
struct fd6_streamout_state tf;
/*}*/
uint16_t tex_seqno;
struct hash_table *tex_cache;
};
static inline struct fd6_context *

View File

@ -325,32 +325,32 @@ emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring)
u_upload_unmap(fd6_ctx->border_color_uploader);
}
static bool
emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum a6xx_state_block sb, struct fd_texture_stateobj *tex)
bool
fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
enum a6xx_state_block sb, struct fd_texture_stateobj *tex,
unsigned bcolor_offset)
{
bool needs_border = false;
unsigned bcolor_offset;
unsigned opcode, tex_samp_reg, tex_const_reg;
unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg;
switch (sb) {
case SB6_VS_TEX:
opcode = CP_LOAD_STATE6_GEOM;
bcolor_offset = 0;
tex_samp_reg = REG_A6XX_SP_VS_TEX_SAMP_LO;
tex_const_reg = REG_A6XX_SP_VS_TEX_CONST_LO;
tex_count_reg = REG_A6XX_SP_VS_TEX_COUNT;
break;
case SB6_FS_TEX:
opcode = CP_LOAD_STATE6_FRAG;
bcolor_offset = ctx->tex[PIPE_SHADER_VERTEX].num_samplers;
tex_samp_reg = REG_A6XX_SP_FS_TEX_SAMP_LO;
tex_const_reg = REG_A6XX_SP_FS_TEX_CONST_LO;
tex_count_reg = REG_A6XX_SP_FS_TEX_COUNT;
break;
case SB6_CS_TEX:
opcode = CP_LOAD_STATE6_FRAG;
bcolor_offset = 0;
tex_samp_reg = REG_A6XX_SP_CS_TEX_SAMP_LO;
tex_const_reg = REG_A6XX_SP_CS_TEX_CONST_LO;
tex_count_reg = 0; //REG_A6XX_SP_CS_TEX_COUNT;
break;
default:
unreachable("bad state block");
@ -359,8 +359,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (tex->num_samplers > 0) {
struct fd_ringbuffer *state =
fd_ringbuffer_new_flags(ctx->pipe, tex->num_samplers * 4 * 4,
FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING);
fd_ringbuffer_new_flags(pipe, tex->num_samplers * 4 * 4,
FD_RINGBUFFER_OBJECT);
for (unsigned i = 0; i < tex->num_samplers; i++) {
static const struct fd6_sampler_stateobj dummy_sampler = {};
const struct fd6_sampler_stateobj *sampler = tex->samplers[i] ?
@ -390,8 +390,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (tex->num_textures > 0) {
struct fd_ringbuffer *state =
fd_ringbuffer_new_flags(ctx->pipe, tex->num_textures * 16 * 4,
FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING);
fd_ringbuffer_new_flags(pipe, tex->num_textures * 16 * 4,
FD_RINGBUFFER_OBJECT);
for (unsigned i = 0; i < tex->num_textures; i++) {
static const struct fd6_pipe_sampler_view dummy_view = {};
const struct fd6_pipe_sampler_view *view = tex->textures[i] ?
@ -445,6 +445,11 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
fd_ringbuffer_del(state);
}
if (tex_count_reg) {
OUT_PKT4(ring, tex_count_reg, 1);
OUT_RING(ring, tex->num_textures);
}
return needs_border;
}
@ -931,29 +936,26 @@ fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
}
if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) {
needs_border |= emit_textures(ctx, ring, SB6_VS_TEX,
&ctx->tex[PIPE_SHADER_VERTEX]);
OUT_PKT4(ring, REG_A6XX_SP_VS_TEX_COUNT, 1);
OUT_RING(ring, ctx->tex[PIPE_SHADER_VERTEX].num_textures);
if ((ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) &&
ctx->tex[PIPE_SHADER_VERTEX].num_textures > 0) {
struct fd6_texture_state *tex = fd6_texture_state(ctx,
SB6_VS_TEX, &ctx->tex[PIPE_SHADER_VERTEX]);
needs_border |= tex->needs_border;
fd6_emit_add_group(emit, tex->stateobj, FD6_GROUP_VS_TEX, 0x7);
}
if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) {
needs_border |= emit_textures(ctx, ring, SB6_FS_TEX,
&ctx->tex[PIPE_SHADER_FRAGMENT]);
OUT_PKT4(ring, REG_A6XX_SP_FS_TEX_COUNT, 1);
OUT_RING(ring, ctx->tex[PIPE_SHADER_FRAGMENT].num_textures);
if ((ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) &&
ctx->tex[PIPE_SHADER_FRAGMENT].num_textures > 0) {
struct fd6_texture_state *tex = fd6_texture_state(ctx,
SB6_FS_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT]);
needs_border |= tex->needs_border;
fd6_emit_add_group(emit, tex->stateobj, FD6_GROUP_FS_TEX, 0x7);
}
#if 0
OUT_PKT4(ring, REG_A6XX_TPL1_FS_TEX_COUNT, 1);
OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask ?
~0 : ctx->tex[PIPE_SHADER_FRAGMENT].num_textures);
OUT_PKT4(ring, REG_A6XX_TPL1_CS_TEX_COUNT, 1);
OUT_RING(ring, 0);
#endif
if (needs_border)
emit_border_color(ctx, ring);
@ -988,8 +990,8 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (dirty & FD_DIRTY_SHADER_TEX) {
bool needs_border = false;
needs_border |= emit_textures(ctx, ring, SB6_CS_TEX,
&ctx->tex[PIPE_SHADER_COMPUTE]);
needs_border |= fd6_emit_textures(ctx->pipe, ring, SB6_CS_TEX,
&ctx->tex[PIPE_SHADER_COMPUTE], 0);
if (needs_border)
emit_border_color(ctx, ring);

View File

@ -45,6 +45,8 @@ struct fd_ringbuffer;
enum fd6_state_id {
FD6_GROUP_VS_CONST,
FD6_GROUP_FS_CONST,
FD6_GROUP_VS_TEX,
FD6_GROUP_FS_TEX,
};
struct fd6_state_group {
@ -174,6 +176,10 @@ fd6_stage2shadersb(enum shader_t type)
}
}
bool fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
enum a6xx_state_block sb, struct fd_texture_stateobj *tex,
unsigned bcolor_offset);
void fd6_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd6_emit *emit);
void fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,

View File

@ -30,9 +30,13 @@
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
#include "util/hash_table.h"
#include "fd6_texture.h"
#include "fd6_format.h"
#include "fd6_emit.h"
static void fd6_texture_state_destroy(struct fd6_texture_state *state);
static enum a6xx_tex_clamp
tex_clamp(unsigned wrap, bool clamp_to_edge, bool *needs_border)
@ -94,6 +98,7 @@ fd6_sampler_state_create(struct pipe_context *pctx,
return NULL;
so->base = *cso;
so->seqno = ++fd6_context(fd_context(pctx))->tex_seqno;
if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
miplinear = true;
@ -140,6 +145,28 @@ fd6_sampler_state_create(struct pipe_context *pctx,
return so;
}
static void
fd6_sampler_state_delete(struct pipe_context *pctx, void *hwcso)
{
struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
struct fd6_sampler_stateobj *samp = hwcso;
struct hash_entry *entry;
hash_table_foreach(fd6_ctx->tex_cache, entry) {
struct fd6_texture_state *state = entry->data;
for (unsigned i = 0; i < ARRAY_SIZE(state->key.samp); i++) {
if (samp->seqno == state->key.samp[i].seqno) {
fd6_texture_state_destroy(entry->data);
_mesa_hash_table_remove(fd6_ctx->tex_cache, entry);
break;
}
}
}
free(hwcso);
}
static void
fd6_sampler_states_bind(struct pipe_context *pctx,
enum pipe_shader_type shader, unsigned start,
@ -215,6 +242,7 @@ fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
so->base.texture = prsc;
so->base.reference.count = 1;
so->base.context = pctx;
so->seqno = ++fd6_context(fd_context(pctx))->tex_seqno;
so->texconst0 =
A6XX_TEX_CONST_0_FMT(fd6_pipe2tex(format)) |
@ -309,6 +337,31 @@ fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
return &so->base;
}
static void
fd6_sampler_view_destroy(struct pipe_context *pctx,
struct pipe_sampler_view *_view)
{
struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
struct fd6_pipe_sampler_view *view = fd6_pipe_sampler_view(_view);
struct hash_entry *entry;
hash_table_foreach(fd6_ctx->tex_cache, entry) {
struct fd6_texture_state *state = entry->data;
for (unsigned i = 0; i < ARRAY_SIZE(state->key.view); i++) {
if (view->seqno == state->key.view[i].seqno) {
fd6_texture_state_destroy(entry->data);
_mesa_hash_table_remove(fd6_ctx->tex_cache, entry);
break;
}
}
}
pipe_resource_reference(&view->base.texture, NULL);
free(view);
}
static void
fd6_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
unsigned start, unsigned nr,
@ -337,11 +390,127 @@ fd6_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
}
}
static uint32_t
key_hash(const void *_key)
{
const struct fd6_texture_key *key = _key;
uint32_t hash = _mesa_fnv32_1a_offset_bias;
hash = _mesa_fnv32_1a_accumulate_block(hash, key, sizeof(*key));
return hash;
}
static bool
key_equals(const void *_a, const void *_b)
{
const struct fd6_texture_key *a = _a;
const struct fd6_texture_key *b = _b;
return memcmp(a, b, sizeof(struct fd6_texture_key)) == 0;
}
struct fd6_texture_state *
fd6_texture_state(struct fd_context *ctx, enum a6xx_state_block sb,
struct fd_texture_stateobj *tex)
{
struct fd6_context *fd6_ctx = fd6_context(ctx);
struct fd6_texture_key key;
bool needs_border = false;
memset(&key, 0, sizeof(key));
for (unsigned i = 0; i < tex->num_textures; i++) {
if (!tex->textures[i])
continue;
struct fd6_pipe_sampler_view *view =
fd6_pipe_sampler_view(tex->textures[i]);
key.view[i].rsc_seqno = fd_resource(view->base.texture)->seqno;
key.view[i].seqno = view->seqno;
}
for (unsigned i = 0; i < tex->num_samplers; i++) {
if (!tex->samplers[i])
continue;
struct fd6_sampler_stateobj *sampler =
fd6_sampler_stateobj(tex->samplers[i]);
key.samp[i].seqno = sampler->seqno;
needs_border |= sampler->needs_border;
}
/* This will need update for HS/DS/GS: */
if (unlikely(needs_border && (sb == SB6_FS_TEX))) {
/* TODO we could probably use fixed offsets for each shader
* stage and avoid the need for # of VS samplers to be part
* of the FS tex state.. but I don't think our handling of
* BCOLOR_OFFSET is actually correct, and trying to use a
* hard coded offset of 16 breaks things.
*
* Note that when this changes, then a corresponding change
* in emit_border_color() is also needed.
*/
key.bcolor_offset = ctx->tex[PIPE_SHADER_VERTEX].num_samplers;
}
uint32_t hash = key_hash(&key);
struct hash_entry *entry =
_mesa_hash_table_search_pre_hashed(fd6_ctx->tex_cache, hash, &key);
if (entry) {
return entry->data;
}
struct fd6_texture_state *state = CALLOC_STRUCT(fd6_texture_state);
state->key = key;
state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
state->needs_border = needs_border;
fd6_emit_textures(ctx->pipe, state->stateobj, sb, tex, key.bcolor_offset);
/* NOTE: uses copy of key in state obj, because pointer passed by caller
* is probably on the stack
*/
_mesa_hash_table_insert_pre_hashed(fd6_ctx->tex_cache, hash,
&state->key, state);
return state;
}
static void
fd6_texture_state_destroy(struct fd6_texture_state *state)
{
fd_ringbuffer_del(state->stateobj);
free(state);
}
void
fd6_texture_init(struct pipe_context *pctx)
{
struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
pctx->create_sampler_state = fd6_sampler_state_create;
pctx->delete_sampler_state = fd6_sampler_state_delete;
pctx->bind_sampler_states = fd6_sampler_states_bind;
pctx->create_sampler_view = fd6_sampler_view_create;
pctx->sampler_view_destroy = fd6_sampler_view_destroy;
pctx->set_sampler_views = fd6_set_sampler_views;
fd6_ctx->tex_cache = _mesa_hash_table_create(NULL, key_hash, key_equals);
}
void
fd6_texture_fini(struct pipe_context *pctx)
{
struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
struct hash_entry *entry;
hash_table_foreach(fd6_ctx->tex_cache, entry) {
fd6_texture_state_destroy(entry->data);
}
ralloc_free(fd6_ctx->tex_cache);
}

View File

@ -41,6 +41,7 @@ struct fd6_sampler_stateobj {
uint32_t texsamp0, texsamp1, texsamp2, texsamp3;
bool saturate_s, saturate_t, saturate_r;
bool needs_border;
uint16_t seqno;
};
static inline struct fd6_sampler_stateobj *
@ -55,6 +56,7 @@ struct fd6_pipe_sampler_view {
uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11;
uint32_t offset;
bool astc_srgb;
uint16_t seqno;
};
static inline struct fd6_pipe_sampler_view *
@ -64,7 +66,7 @@ fd6_pipe_sampler_view(struct pipe_sampler_view *pview)
}
void fd6_texture_init(struct pipe_context *pctx);
void fd6_texture_fini(struct pipe_context *pctx);
static inline enum a6xx_tex_type
fd6_tex_type(unsigned target)
@ -88,4 +90,38 @@ fd6_tex_type(unsigned target)
}
}
/*
* Texture stateobj:
*
* The sampler and sampler-view state is mapped to a single hardware
* stateobj which can be emit'd as a pointer in a CP_SET_DRAW_STATE
* packet, to avoid the overhead of re-generating the entire cmdstream
* when application toggles thru multiple different texture states.
*/
struct fd6_texture_key {
struct {
/* We need to track the seqno of the rsc as well as of the
* sampler view, because resource shadowing/etc can result
* that the underlying bo changes (which means the previous
* state was no longer valid.
*/
uint16_t rsc_seqno;
uint16_t seqno;
} view[16];
struct {
uint16_t seqno;
} samp[16];
uint8_t bcolor_offset;
};
struct fd6_texture_state {
struct fd6_texture_key key;
struct fd_ringbuffer *stateobj;
bool needs_border;
};
struct fd6_texture_state * fd6_texture_state(struct fd_context *ctx,
enum a6xx_state_block sb, struct fd_texture_stateobj *tex);
#endif /* FD6_TEXTURE_H_ */