From 080022833db0896b8af4482e726200acd51507a0 Mon Sep 17 00:00:00 2001 From: Antonio Abbatangelo Date: Tue, 8 Mar 2022 21:29:46 -0500 Subject: [PATCH] nv2a: Cache shaders to disk --- config_spec.yml | 6 +- hw/xbox/nv2a/lru.h | 15 + hw/xbox/nv2a/nv2a.c | 8 + hw/xbox/nv2a/nv2a_int.h | 11 +- hw/xbox/nv2a/pfifo.c | 6 +- hw/xbox/nv2a/pgraph.c | 41 ++- hw/xbox/nv2a/shaders.c | 644 +++++++++++++++++++++++++++++++++------- hw/xbox/nv2a/shaders.h | 22 ++ include/qemu/osdep.h | 3 + ui/xemu-settings.cc | 27 +- ui/xemu-settings.h | 3 + ui/xui/main-menu.cc | 4 +- util/osdep.c | 39 +++ 13 files changed, 681 insertions(+), 148 deletions(-) diff --git a/config_spec.yml b/config_spec.yml index 0cf5ebb8f1..5511363ca3 100644 --- a/config_spec.yml +++ b/config_spec.yml @@ -191,6 +191,6 @@ perf: hard_fpu: type: bool default: true - # cache_shaders: - # type: bool - # default: true + cache_shaders: + type: bool + default: true diff --git a/hw/xbox/nv2a/lru.h b/hw/xbox/nv2a/lru.h index a692eb998f..c0dca7ec5d 100644 --- a/hw/xbox/nv2a/lru.h +++ b/hw/xbox/nv2a/lru.h @@ -129,6 +129,21 @@ LruNode *lru_evict_one(Lru *lru) return found; } +static inline +bool lru_contains_hash(Lru *lru, uint64_t hash) +{ + unsigned int bin = lru_hash_to_bin(lru, hash); + LruNode *iter; + + QTAILQ_FOREACH(iter, &lru->bins[bin], next_bin) { + if (iter->hash == hash) { + return true; + } + } + + return false; +} + static inline LruNode *lru_lookup(Lru *lru, uint64_t hash, void *key) { diff --git a/hw/xbox/nv2a/nv2a.c b/hw/xbox/nv2a/nv2a.c index 989140b23c..e09fc8f890 100644 --- a/hw/xbox/nv2a/nv2a.c +++ b/hw/xbox/nv2a/nv2a.c @@ -380,6 +380,14 @@ static void nv2a_vm_state_change(void *opaque, bool running, RunState state) nv2a_lock_fifo(d); qatomic_set(&d->pfifo.halt, false); nv2a_unlock_fifo(d); + } else if (state == RUN_STATE_SHUTDOWN) { + nv2a_lock_fifo(d); + qatomic_set(&d->pgraph.shader_cache_writeback_pending, true); + qemu_event_reset(&d->pgraph.shader_cache_writeback_complete); + nv2a_unlock_fifo(d); + qemu_mutex_unlock_iothread(); + qemu_event_wait(&d->pgraph.shader_cache_writeback_complete); + qemu_mutex_lock_iothread(); } } diff --git a/hw/xbox/nv2a/nv2a_int.h b/hw/xbox/nv2a/nv2a_int.h index 795820beae..65233ff265 100644 --- a/hw/xbox/nv2a/nv2a_int.h +++ b/hw/xbox/nv2a/nv2a_int.h @@ -305,12 +305,15 @@ typedef struct PGRAPHState { hwaddr dma_a, dma_b; Lru texture_cache; - struct TextureLruNode *texture_cache_entries; + TextureLruNode *texture_cache_entries; bool texture_dirty[NV2A_MAX_TEXTURES]; TextureBinding *texture_binding[NV2A_MAX_TEXTURES]; - GHashTable *shader_cache; + Lru shader_cache; + ShaderLruNode *shader_cache_entries; ShaderBinding *shader_binding; + QemuMutex shader_cache_lock; + QemuThread shader_disk_thread; bool texture_matrix_enable[NV2A_MAX_TEXTURES]; @@ -370,7 +373,7 @@ typedef struct PGRAPHState { uint16_t compressed_attrs; Lru element_cache; - struct VertexLruNode *element_cache_entries; + VertexLruNode *element_cache_entries; unsigned int inline_array_length; uint32_t inline_array[NV2A_MAX_BATCH_LENGTH]; @@ -403,10 +406,12 @@ typedef struct PGRAPHState { bool download_dirty_surfaces_pending; bool flush_pending; bool gl_sync_pending; + bool shader_cache_writeback_pending; QemuEvent downloads_complete; QemuEvent dirty_surfaces_download_complete; QemuEvent flush_complete; QemuEvent gl_sync_complete; + QemuEvent shader_cache_writeback_complete; unsigned int surface_scale_factor; uint8_t *scale_buf; diff --git a/hw/xbox/nv2a/pfifo.c b/hw/xbox/nv2a/pfifo.c index 0f73a9092c..77dd175098 100644 --- a/hw/xbox/nv2a/pfifo.c +++ b/hw/xbox/nv2a/pfifo.c @@ -452,7 +452,8 @@ static void process_requests(NV2AState *d) if (qatomic_read(&d->pgraph.downloads_pending) || qatomic_read(&d->pgraph.download_dirty_surfaces_pending) || qatomic_read(&d->pgraph.gl_sync_pending) || - qatomic_read(&d->pgraph.flush_pending)) { + qatomic_read(&d->pgraph.flush_pending) || + qatomic_read(&d->pgraph.shader_cache_writeback_pending)) { qemu_mutex_unlock(&d->pfifo.lock); qemu_mutex_lock(&d->pgraph.lock); if (qatomic_read(&d->pgraph.downloads_pending)) { @@ -467,6 +468,9 @@ static void process_requests(NV2AState *d) if (qatomic_read(&d->pgraph.flush_pending)) { pgraph_flush(d); } + if (qatomic_read(&d->pgraph.shader_cache_writeback_pending)) { + shader_write_cache_reload_list(&d->pgraph); + } qemu_mutex_unlock(&d->pgraph.lock); qemu_mutex_lock(&d->pfifo.lock); } diff --git a/hw/xbox/nv2a/pgraph.c b/hw/xbox/nv2a/pgraph.c index 85bee8e803..0bb7b3f17e 100644 --- a/hw/xbox/nv2a/pgraph.c +++ b/hw/xbox/nv2a/pgraph.c @@ -451,8 +451,6 @@ static bool vertex_cache_entry_compare(Lru *lru, LruNode *node, void *key) static void pgraph_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, hwaddr size); static bool pgraph_check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size); -static guint shader_hash(gconstpointer key); -static gboolean shader_equal(gconstpointer a, gconstpointer b); static unsigned int kelvin_map_stencil_op(uint32_t parameter); static unsigned int kelvin_map_polygon_mode(uint32_t parameter); static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel); @@ -3958,10 +3956,12 @@ void pgraph_init(NV2AState *d) pg->downloads_pending = false; qemu_mutex_init(&pg->lock); + qemu_mutex_init(&pg->shader_cache_lock); qemu_event_init(&pg->gl_sync_complete, false); qemu_event_init(&pg->downloads_complete, false); qemu_event_init(&pg->dirty_surfaces_download_complete, false); qemu_event_init(&pg->flush_complete, false); + qemu_event_init(&pg->shader_cache_writeback_complete, false); /* fire up opengl */ glo_set_current(g_nv2a_context_render); @@ -4018,7 +4018,7 @@ void pgraph_init(NV2AState *d) pg->element_cache.init_node = vertex_cache_entry_init; pg->element_cache.compare_nodes = vertex_cache_entry_compare; - pg->shader_cache = g_hash_table_new(shader_hash, shader_equal); + shader_cache_init(pg); pg->material_alpha = 0.0f; SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_SHADEMODE, @@ -4053,6 +4053,7 @@ void pgraph_init(NV2AState *d) void pgraph_destroy(PGRAPHState *pg) { qemu_mutex_destroy(&pg->lock); + qemu_mutex_destroy(&pg->shader_cache_lock); glo_set_current(g_nv2a_context_render); @@ -4060,7 +4061,9 @@ void pgraph_destroy(PGRAPHState *pg) glDeleteFramebuffers(1, &pg->gl_framebuffer); - // TODO: clear out shader cached + // Clear out shader cache + shader_write_cache_reload_list(pg); + free(pg->shader_cache_entries); // Clear out texture cache lru_flush(&pg->texture_cache); @@ -4640,20 +4643,25 @@ static void pgraph_bind_shaders(PGRAPHState *pg) state.psh.conv_tex[i] = kernel; } - ShaderBinding* cached_shader = (ShaderBinding*)g_hash_table_lookup(pg->shader_cache, &state); - if (cached_shader) { - pg->shader_binding = cached_shader; + uint64_t shader_state_hash = fast_hash((uint8_t*) &state, sizeof(ShaderState)); + qemu_mutex_lock(&pg->shader_cache_lock); + LruNode *node = lru_lookup(&pg->shader_cache, shader_state_hash, &state); + ShaderLruNode *snode = container_of(node, ShaderLruNode, node); + if (snode->binding || shader_load_from_memory(snode)) { + pg->shader_binding = snode->binding; } else { pg->shader_binding = generate_shaders(&state); nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN); /* cache it */ - ShaderState *cache_state = (ShaderState *)g_malloc(sizeof(*cache_state)); - memcpy(cache_state, &state, sizeof(*cache_state)); - g_hash_table_insert(pg->shader_cache, cache_state, - (gpointer)pg->shader_binding); + snode->binding = pg->shader_binding; + if (g_config.perf.cache_shaders) { + shader_cache_to_disk(snode); + } } + qemu_mutex_unlock(&pg->shader_cache_lock); + binding_changed = (pg->shader_binding != old_binding); if (binding_changed) { nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND); @@ -7649,17 +7657,6 @@ static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key) return memcmp(&tnode->key, key, sizeof(TextureKey)); } -/* hash and equality for shader cache hash table */ -static guint shader_hash(gconstpointer key) -{ - return fast_hash((const uint8_t *)key, sizeof(ShaderState)); -} -static gboolean shader_equal(gconstpointer a, gconstpointer b) -{ - const ShaderState *as = (const ShaderState *)a, *bs = (const ShaderState *)b; - return memcmp(as, bs, sizeof(ShaderState)) == 0; -} - static unsigned int kelvin_map_stencil_op(uint32_t parameter) { unsigned int op; diff --git a/hw/xbox/nv2a/shaders.c b/hw/xbox/nv2a/shaders.c index d285e6194c..409129e50d 100644 --- a/hw/xbox/nv2a/shaders.c +++ b/hw/xbox/nv2a/shaders.c @@ -25,6 +25,9 @@ #include "shaders_common.h" #include "shaders.h" +#include "nv2a_int.h" +#include "ui/xemu-settings.h" +#include "xemu-version.h" void mstring_append_fmt(MString *qstring, const char *fmt, ...) { @@ -71,6 +74,37 @@ void mstring_append_va(MString *qstring, const char *fmt, va_list va) g_free(buf); } +GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode) +{ + if (polygon_mode == POLY_MODE_POINT) { + return GL_POINTS; + } + + switch (primitive_mode) { + case PRIM_TYPE_POINTS: return GL_POINTS; + case PRIM_TYPE_LINES: return GL_LINES; + case PRIM_TYPE_LINE_LOOP: return GL_LINE_LOOP; + case PRIM_TYPE_LINE_STRIP: return GL_LINE_STRIP; + case PRIM_TYPE_TRIANGLES: return GL_TRIANGLES; + case PRIM_TYPE_TRIANGLE_STRIP: return GL_TRIANGLE_STRIP; + case PRIM_TYPE_TRIANGLE_FAN: return GL_TRIANGLE_FAN; + case PRIM_TYPE_QUADS: return GL_LINES_ADJACENCY; + case PRIM_TYPE_QUAD_STRIP: return GL_LINE_STRIP_ADJACENCY; + case PRIM_TYPE_POLYGON: + if (polygon_mode == POLY_MODE_LINE) { + return GL_LINE_LOOP; + } else if (polygon_mode == POLY_MODE_FILL) { + return GL_TRIANGLE_FAN; + } + + assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode"); + return 0; + default: + assert(!"Invalid primitive_mode"); + return 0; + } +} + static MString* generate_geometry_shader( enum ShaderPolygonMode polygon_front_mode, enum ShaderPolygonMode polygon_back_mode, @@ -78,14 +112,14 @@ static MString* generate_geometry_shader( GLenum *gl_primitive_mode, bool smooth_shading) { - /* FIXME: Missing support for 2-sided-poly mode */ assert(polygon_front_mode == polygon_back_mode); enum ShaderPolygonMode polygon_mode = polygon_front_mode; + *gl_primitive_mode = get_gl_primitive_mode(polygon_mode, primitive_mode); + /* POINT mode shouldn't require any special work */ if (polygon_mode == POLY_MODE_POINT) { - *gl_primitive_mode = GL_POINTS; return NULL; } @@ -94,12 +128,11 @@ static MString* generate_geometry_shader( const char *layout_out = NULL; const char *body = NULL; switch (primitive_mode) { - case PRIM_TYPE_POINTS: *gl_primitive_mode = GL_POINTS; return NULL; - case PRIM_TYPE_LINES: *gl_primitive_mode = GL_LINES; return NULL; - case PRIM_TYPE_LINE_LOOP: *gl_primitive_mode = GL_LINE_LOOP; return NULL; - case PRIM_TYPE_LINE_STRIP: *gl_primitive_mode = GL_LINE_STRIP; return NULL; + case PRIM_TYPE_POINTS: return NULL; + case PRIM_TYPE_LINES: return NULL; + case PRIM_TYPE_LINE_LOOP: return NULL; + case PRIM_TYPE_LINE_STRIP: return NULL; case PRIM_TYPE_TRIANGLES: - *gl_primitive_mode = GL_TRIANGLES; if (polygon_mode == POLY_MODE_FILL) { return NULL; } assert(polygon_mode == POLY_MODE_LINE); layout_in = "layout(triangles) in;\n"; @@ -111,7 +144,6 @@ static MString* generate_geometry_shader( " EndPrimitive();\n"; break; case PRIM_TYPE_TRIANGLE_STRIP: - *gl_primitive_mode = GL_TRIANGLE_STRIP; if (polygon_mode == POLY_MODE_FILL) { return NULL; } assert(polygon_mode == POLY_MODE_LINE); layout_in = "layout(triangles) in;\n"; @@ -133,7 +165,6 @@ static MString* generate_geometry_shader( " EndPrimitive();\n"; break; case PRIM_TYPE_TRIANGLE_FAN: - *gl_primitive_mode = GL_TRIANGLE_FAN; if (polygon_mode == POLY_MODE_FILL) { return NULL; } assert(polygon_mode == POLY_MODE_LINE); layout_in = "layout(triangles) in;\n"; @@ -147,7 +178,6 @@ static MString* generate_geometry_shader( " EndPrimitive();\n"; break; case PRIM_TYPE_QUADS: - *gl_primitive_mode = GL_LINES_ADJACENCY; layout_in = "layout(lines_adjacency) in;\n"; if (polygon_mode == POLY_MODE_LINE) { layout_out = "layout(line_strip, max_vertices = 5) out;\n"; @@ -170,7 +200,6 @@ static MString* generate_geometry_shader( } break; case PRIM_TYPE_QUAD_STRIP: - *gl_primitive_mode = GL_LINE_STRIP_ADJACENCY; layout_in = "layout(lines_adjacency) in;\n"; if (polygon_mode == POLY_MODE_LINE) { layout_out = "layout(line_strip, max_vertices = 5) out;\n"; @@ -198,11 +227,9 @@ static MString* generate_geometry_shader( break; case PRIM_TYPE_POLYGON: if (polygon_mode == POLY_MODE_LINE) { - *gl_primitive_mode = GL_LINE_LOOP; return NULL; } if (polygon_mode == POLY_MODE_FILL) { - *gl_primitive_mode = GL_TRIANGLE_FAN; if (smooth_shading) { return NULL; } @@ -291,7 +318,6 @@ static void append_skinning_code(MString* str, bool mix, const char* output, const char* input, const char* matrix, const char* swizzle) { - if (count == 0) { mstring_append_fmt(str, "%s %s = (%s * %s0).%s;\n", type, output, input, matrix, swizzle); @@ -999,11 +1025,107 @@ static GLuint create_gl_shader(GLenum gl_shader_type, return shader; } -ShaderBinding *generate_shaders(const ShaderState *state) +void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state) { int i, j; char tmp[64]; + /* set texture samplers */ + for (i = 0; i < NV2A_MAX_TEXTURES; i++) { + char samplerName[16]; + snprintf(samplerName, sizeof(samplerName), "texSamp%d", i); + GLint texSampLoc = glGetUniformLocation(binding->gl_program, samplerName); + if (texSampLoc >= 0) { + glUniform1i(texSampLoc, i); + } + } + + /* validate the program */ + glValidateProgram(binding->gl_program); + GLint valid = 0; + glGetProgramiv(binding->gl_program, GL_VALIDATE_STATUS, &valid); + if (!valid) { + GLchar log[1024]; + glGetProgramInfoLog(binding->gl_program, 1024, NULL, log); + fprintf(stderr, "nv2a: shader validation failed: %s\n", log); + abort(); + } + + /* lookup fragment shader uniforms */ + for (i = 0; i < 9; i++) { + for (j = 0; j < 2; j++) { + snprintf(tmp, sizeof(tmp), "c%d_%d", j, i); + binding->psh_constant_loc[i][j] = glGetUniformLocation(binding->gl_program, tmp); + } + } + binding->alpha_ref_loc = glGetUniformLocation(binding->gl_program, "alphaRef"); + for (i = 1; i < NV2A_MAX_TEXTURES; i++) { + snprintf(tmp, sizeof(tmp), "bumpMat%d", i); + binding->bump_mat_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + snprintf(tmp, sizeof(tmp), "bumpScale%d", i); + binding->bump_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + snprintf(tmp, sizeof(tmp), "bumpOffset%d", i); + binding->bump_offset_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + snprintf(tmp, sizeof(tmp), "texScale%d", i); + binding->tex_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + + /* lookup vertex shader uniforms */ + for(i = 0; i < NV2A_VERTEXSHADER_CONSTANTS; i++) { + snprintf(tmp, sizeof(tmp), "c[%d]", i); + binding->vsh_constant_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + binding->surface_size_loc = glGetUniformLocation(binding->gl_program, "surfaceSize"); + binding->clip_range_loc = glGetUniformLocation(binding->gl_program, "clipRange"); + binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor"); + binding->fog_param_loc[0] = glGetUniformLocation(binding->gl_program, "fogParam[0]"); + binding->fog_param_loc[1] = glGetUniformLocation(binding->gl_program, "fogParam[1]"); + + binding->inv_viewport_loc = glGetUniformLocation(binding->gl_program, "invViewport"); + for (i = 0; i < NV2A_LTCTXA_COUNT; i++) { + snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i); + binding->ltctxa_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + for (i = 0; i < NV2A_LTCTXB_COUNT; i++) { + snprintf(tmp, sizeof(tmp), "ltctxb[%d]", i); + binding->ltctxb_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + for (i = 0; i < NV2A_LTC1_COUNT; i++) { + snprintf(tmp, sizeof(tmp), "ltc1[%d]", i); + binding->ltc1_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + for (i = 0; i < NV2A_MAX_LIGHTS; i++) { + snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i); + binding->light_infinite_half_vector_loc[i] = + glGetUniformLocation(binding->gl_program, tmp); + snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i); + binding->light_infinite_direction_loc[i] = + glGetUniformLocation(binding->gl_program, tmp); + + snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i); + binding->light_local_position_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i); + binding->light_local_attenuation_loc[i] = + glGetUniformLocation(binding->gl_program, tmp); + } + for (i = 0; i < 8; i++) { + snprintf(tmp, sizeof(tmp), "clipRegion[%d]", i); + binding->clip_region_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + + if (state->fixed_function) { + binding->material_alpha_loc = + glGetUniformLocation(binding->gl_program, "material_alpha"); + } else { + binding->material_alpha_loc = -1; + } +} + +ShaderBinding *generate_shaders(const ShaderState *state) +{ char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL); if (previous_numeric_locale) { previous_numeric_locale = g_strdup(previous_numeric_locale); @@ -1011,10 +1133,9 @@ ShaderBinding *generate_shaders(const ShaderState *state) /* Ensure numeric values are printed with '.' radix, no grouping */ setlocale(LC_NUMERIC, "C"); - GLuint program = glCreateProgram(); - /* Create an option geometry shader and find primitive type */ + /* Create an optional geometry shader and find primitive type */ GLenum gl_primitive_mode; MString* geometry_shader_code = generate_geometry_shader(state->polygon_front_mode, @@ -1064,102 +1185,11 @@ ShaderBinding *generate_shaders(const ShaderState *state) glUseProgram(program); - /* set texture samplers */ - for (i = 0; i < NV2A_MAX_TEXTURES; i++) { - char samplerName[16]; - snprintf(samplerName, sizeof(samplerName), "texSamp%d", i); - GLint texSampLoc = glGetUniformLocation(program, samplerName); - if (texSampLoc >= 0) { - glUniform1i(texSampLoc, i); - } - } - - /* validate the program */ - glValidateProgram(program); - GLint valid = 0; - glGetProgramiv(program, GL_VALIDATE_STATUS, &valid); - if (!valid) { - GLchar log[1024]; - glGetProgramInfoLog(program, 1024, NULL, log); - fprintf(stderr, "nv2a: shader validation failed: %s\n", log); - abort(); - } - ShaderBinding* ret = g_malloc0(sizeof(ShaderBinding)); ret->gl_program = program; ret->gl_primitive_mode = gl_primitive_mode; - /* lookup fragment shader uniforms */ - for (i = 0; i < 9; i++) { - for (j = 0; j < 2; j++) { - snprintf(tmp, sizeof(tmp), "c%d_%d", j, i); - ret->psh_constant_loc[i][j] = glGetUniformLocation(program, tmp); - } - } - ret->alpha_ref_loc = glGetUniformLocation(program, "alphaRef"); - for (i = 1; i < NV2A_MAX_TEXTURES; i++) { - snprintf(tmp, sizeof(tmp), "bumpMat%d", i); - ret->bump_mat_loc[i] = glGetUniformLocation(program, tmp); - snprintf(tmp, sizeof(tmp), "bumpScale%d", i); - ret->bump_scale_loc[i] = glGetUniformLocation(program, tmp); - snprintf(tmp, sizeof(tmp), "bumpOffset%d", i); - ret->bump_offset_loc[i] = glGetUniformLocation(program, tmp); - } - - for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { - snprintf(tmp, sizeof(tmp), "texScale%d", i); - ret->tex_scale_loc[i] = glGetUniformLocation(program, tmp); - } - - /* lookup vertex shader uniforms */ - for(i = 0; i < NV2A_VERTEXSHADER_CONSTANTS; i++) { - snprintf(tmp, sizeof(tmp), "c[%d]", i); - ret->vsh_constant_loc[i] = glGetUniformLocation(program, tmp); - } - ret->surface_size_loc = glGetUniformLocation(program, "surfaceSize"); - ret->clip_range_loc = glGetUniformLocation(program, "clipRange"); - ret->fog_color_loc = glGetUniformLocation(program, "fogColor"); - ret->fog_param_loc[0] = glGetUniformLocation(program, "fogParam[0]"); - ret->fog_param_loc[1] = glGetUniformLocation(program, "fogParam[1]"); - - ret->inv_viewport_loc = glGetUniformLocation(program, "invViewport"); - for (i = 0; i < NV2A_LTCTXA_COUNT; i++) { - snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i); - ret->ltctxa_loc[i] = glGetUniformLocation(program, tmp); - } - for (i = 0; i < NV2A_LTCTXB_COUNT; i++) { - snprintf(tmp, sizeof(tmp), "ltctxb[%d]", i); - ret->ltctxb_loc[i] = glGetUniformLocation(program, tmp); - } - for (i = 0; i < NV2A_LTC1_COUNT; i++) { - snprintf(tmp, sizeof(tmp), "ltc1[%d]", i); - ret->ltc1_loc[i] = glGetUniformLocation(program, tmp); - } - for (i = 0; i < NV2A_MAX_LIGHTS; i++) { - snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i); - ret->light_infinite_half_vector_loc[i] = - glGetUniformLocation(program, tmp); - snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i); - ret->light_infinite_direction_loc[i] = - glGetUniformLocation(program, tmp); - - snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i); - ret->light_local_position_loc[i] = glGetUniformLocation(program, tmp); - snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i); - ret->light_local_attenuation_loc[i] = - glGetUniformLocation(program, tmp); - } - for (i = 0; i < 8; i++) { - snprintf(tmp, sizeof(tmp), "clipRegion[%d]", i); - ret->clip_region_loc[i] = glGetUniformLocation(program, tmp); - } - - if (state->fixed_function) { - ret->material_alpha_loc = - glGetUniformLocation(program, "material_alpha"); - } else { - ret->material_alpha_loc = -1; - } + update_shader_constant_locations(ret, state); if (previous_numeric_locale) { setlocale(LC_NUMERIC, previous_numeric_locale); @@ -1168,3 +1198,399 @@ ShaderBinding *generate_shaders(const ShaderState *state) return ret; } + +static const char *shader_gl_vendor = NULL; + +static void shader_create_cache_folder(void) +{ + char *shader_path = g_strdup_printf("%sshaders", xemu_settings_get_base_path()); + qemu_mkdir(shader_path); + g_free(shader_path); +} + +static char *shader_get_lru_cache_path(void) +{ + return g_strdup_printf("%s/shader_cache_list", xemu_settings_get_base_path()); +} + +static void shader_write_lru_list_entry_to_disk(Lru *lru, LruNode *node, void *opaque) +{ + FILE *lru_list_file = (FILE*) opaque; + size_t written = fwrite(&node->hash, sizeof(uint64_t), 1, lru_list_file); + if (written != 1) { + fprintf(stderr, "nv2a: Failed to write shader list entry %llx to disk\n", + (unsigned long long) node->hash); + } +} + +void shader_write_cache_reload_list(PGRAPHState *pg) +{ + if (!g_config.perf.cache_shaders) { + qatomic_set(&pg->shader_cache_writeback_pending, false); + qemu_event_set(&pg->shader_cache_writeback_complete); + return; + } + + char *shader_lru_path = shader_get_lru_cache_path(); + qemu_thread_join(&pg->shader_disk_thread); + + FILE *lru_list = qemu_fopen(shader_lru_path, "wb"); + g_free(shader_lru_path); + if (!lru_list) { + fprintf(stderr, "nv2a: Failed to open shader LRU cache for writing\n"); + return; + } + + lru_visit_active(&pg->shader_cache, shader_write_lru_list_entry_to_disk, lru_list); + fclose(lru_list); + + lru_flush(&pg->shader_cache); + + qatomic_set(&pg->shader_cache_writeback_pending, false); + qemu_event_set(&pg->shader_cache_writeback_complete); +} + +bool shader_load_from_memory(ShaderLruNode *snode) +{ + assert(glGetError() == GL_NO_ERROR); + + if (!snode->program) { + return false; + } + + GLuint gl_program = glCreateProgram(); + glProgramBinary(gl_program, snode->program_format, snode->program, snode->program_size); + GLint gl_error = glGetError(); + if (gl_error != GL_NO_ERROR) { + NV2A_DPRINTF("failed to load shader binary from disk: GL error code %d\n", gl_error); + glDeleteProgram(gl_program); + return false; + } + + glValidateProgram(gl_program); + GLint valid = 0; + glGetProgramiv(gl_program, GL_VALIDATE_STATUS, &valid); + if (!valid) { + GLchar log[1024]; + glGetProgramInfoLog(gl_program, 1024, NULL, log); + NV2A_DPRINTF("failed to load shader binary from disk: %s\n", log); + glDeleteProgram(gl_program); + return false; + } + + glUseProgram(gl_program); + + ShaderBinding* binding = g_malloc0(sizeof(ShaderBinding)); + binding->gl_program = gl_program; + binding->gl_primitive_mode = get_gl_primitive_mode(snode->state.polygon_front_mode, + snode->state.primitive_mode); + snode->binding = binding; + + g_free(snode->program); + snode->program = NULL; + + update_shader_constant_locations(binding, &snode->state); + + return true; +} + +static char *shader_get_bin_directory(uint64_t hash) +{ + const char *cfg_dir = xemu_settings_get_base_path(); + uint64_t bin_mask = 0xffffUL << 48; + char *shader_bin_dir = g_strdup_printf("%s/shaders/%04lx", + cfg_dir, (hash & bin_mask) >> 48); + return shader_bin_dir; +} + +static char *shader_get_binary_path(const char *shader_bin_dir, uint64_t hash) +{ + uint64_t bin_mask = 0xffffUL << 48; + return g_strdup_printf("%s/%012lx", shader_bin_dir, + hash & (~bin_mask)); +} + +static void shader_load_from_disk(PGRAPHState *pg, uint64_t hash) +{ + char *shader_bin_dir = shader_get_bin_directory(hash); + char *shader_path = shader_get_binary_path(shader_bin_dir, hash); + char *cached_xemu_version = NULL; + char *cached_gl_vendor = NULL; + void *program_buffer = NULL; + + uint64_t cached_xemu_version_len; + uint64_t gl_vendor_len; + GLenum program_binary_format; + ShaderState state; + size_t shader_size; + + g_free(shader_bin_dir); + + qemu_mutex_lock(&pg->shader_cache_lock); + if (lru_contains_hash(&pg->shader_cache, hash)) { + qemu_mutex_unlock(&pg->shader_cache_lock); + return; + } + qemu_mutex_unlock(&pg->shader_cache_lock); + + FILE *shader_file = qemu_fopen(shader_path, "rb"); + if (!shader_file) { + goto error; + } + + size_t nread; + #define READ_OR_ERR(data, data_len) \ + do { \ + nread = fread(data, data_len, 1, shader_file); \ + if (nread != 1) { \ + fclose(shader_file); \ + goto error; \ + } \ + } while (0) + + READ_OR_ERR(&cached_xemu_version_len, sizeof(cached_xemu_version_len)); + + cached_xemu_version = g_malloc(cached_xemu_version_len +1); + READ_OR_ERR(cached_xemu_version, cached_xemu_version_len); + if (strcmp(cached_xemu_version, xemu_version) != 0) { + fclose(shader_file); + goto error; + } + + READ_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len)); + + cached_gl_vendor = g_malloc(gl_vendor_len); + READ_OR_ERR(cached_gl_vendor, gl_vendor_len); + if (strcmp(cached_gl_vendor, shader_gl_vendor) != 0) { + fclose(shader_file); + goto error; + } + + READ_OR_ERR(&program_binary_format, sizeof(program_binary_format)); + READ_OR_ERR(&state, sizeof(state)); + READ_OR_ERR(&shader_size, sizeof(shader_size)); + + program_buffer = g_malloc(shader_size); + READ_OR_ERR(program_buffer, shader_size); + + #undef READ_OR_ERR + + fclose(shader_file); + g_free(shader_path); + g_free(cached_xemu_version); + g_free(cached_gl_vendor); + + qemu_mutex_lock(&pg->shader_cache_lock); + LruNode *node = lru_lookup(&pg->shader_cache, hash, &state); + ShaderLruNode *snode = container_of(node, ShaderLruNode, node); + + /* If we happened to regenerate this shader already, then we may as well use the new one */ + if (snode->binding) { + qemu_mutex_unlock(&pg->shader_cache_lock); + return; + } + + snode->program_format = program_binary_format; + snode->program_size = shader_size; + snode->program = program_buffer; + snode->cached = true; + qemu_mutex_unlock(&pg->shader_cache_lock); + return; + +error: + /* Delete the shader so it won't be loaded again */ + qemu_unlink(shader_path); + g_free(shader_path); + g_free(program_buffer); + g_free(cached_xemu_version); + g_free(cached_gl_vendor); +} + +static void *shader_reload_lru_from_disk(void *arg) +{ + if (!g_config.perf.cache_shaders) { + return NULL; + } + + PGRAPHState *pg = (PGRAPHState*) arg; + char *shader_lru_path = shader_get_lru_cache_path(); + + FILE *lru_shaders_list = qemu_fopen(shader_lru_path, "rb"); + g_free(shader_lru_path); + if (!lru_shaders_list) { + return NULL; + } + + uint64_t hash; + while (fread(&hash, sizeof(uint64_t), 1, lru_shaders_list) == 1) { + shader_load_from_disk(pg, hash); + } + + return NULL; +} + +static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state) +{ + ShaderLruNode *snode = container_of(node, ShaderLruNode, node); + memcpy(&snode->state, state, sizeof(ShaderState)); + snode->cached = false; + snode->binding = NULL; + snode->program = NULL; + snode->save_thread = NULL; +} + +static void shader_cache_entry_post_evict(Lru *lru, LruNode *node) +{ + ShaderLruNode *snode = container_of(node, ShaderLruNode, node); + + if (snode->save_thread) { + qemu_thread_join(snode->save_thread); + g_free(snode->save_thread); + } + + if (snode->binding) { + glDeleteProgram(snode->binding->gl_program); + g_free(snode->binding); + } + + if (snode->program) { + g_free(snode->program); + } + + snode->cached = false; + snode->save_thread = NULL; + snode->binding = NULL; + snode->program = NULL; + memset(&snode->state, 0, sizeof(ShaderState)); +} + +static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key) +{ + ShaderLruNode *snode = container_of(node, ShaderLruNode, node); + return memcmp(&snode->state, key, sizeof(ShaderState)); +} + +void shader_cache_init(PGRAPHState *pg) +{ + if (!shader_gl_vendor) { + shader_gl_vendor = (const char *) glGetString(GL_VENDOR); + } + + shader_create_cache_folder(); + + /* FIXME: Make this configurable */ + const size_t shader_cache_size = 50*1024; + lru_init(&pg->shader_cache); + pg->shader_cache_entries = malloc(shader_cache_size * sizeof(ShaderLruNode)); + assert(pg->shader_cache_entries != NULL); + for (int i = 0; i < shader_cache_size; i++) { + lru_add_free(&pg->shader_cache, &pg->shader_cache_entries[i].node); + } + + pg->shader_cache.init_node = shader_cache_entry_init; + pg->shader_cache.compare_nodes = shader_cache_entry_compare; + pg->shader_cache.post_node_evict = shader_cache_entry_post_evict; + + qemu_thread_create(&pg->shader_disk_thread, "pgraph.shader_cache", + shader_reload_lru_from_disk, pg, QEMU_THREAD_JOINABLE); +} + +static void *shader_write_to_disk(void *arg) +{ + ShaderLruNode *snode = (ShaderLruNode*) arg; + + char *shader_bin = shader_get_bin_directory(snode->node.hash); + char *shader_path = shader_get_binary_path(shader_bin, snode->node.hash); + + static uint64_t gl_vendor_len; + if (gl_vendor_len == 0) { + gl_vendor_len = (uint64_t) (strlen(shader_gl_vendor) + 1); + } + + static uint64_t xemu_version_len = 0; + if (xemu_version_len == 0) { + xemu_version_len = (uint64_t) (strlen(xemu_version) + 1); + } + + qemu_mkdir(shader_bin); + g_free(shader_bin); + + FILE *shader_file = qemu_fopen(shader_path, "wb"); + if (!shader_file) { + goto error; + } + + size_t written; + #define WRITE_OR_ERR(data, data_size) \ + do { \ + written = fwrite(data, data_size, 1, shader_file); \ + if (written != 1) { \ + fclose(shader_file); \ + goto error; \ + } \ + } while (0) + + WRITE_OR_ERR(&xemu_version_len, sizeof(xemu_version_len)); + WRITE_OR_ERR(xemu_version, xemu_version_len); + + WRITE_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len)); + WRITE_OR_ERR(shader_gl_vendor, gl_vendor_len); + + WRITE_OR_ERR(&snode->program_format, sizeof(snode->program_format)); + WRITE_OR_ERR(&snode->state, sizeof(snode->state)); + + WRITE_OR_ERR(&snode->program_size, sizeof(snode->program_size)); + WRITE_OR_ERR(snode->program, snode->program_size); + + #undef WRITE_OR_ERR + + fclose(shader_file); + + g_free(shader_path); + g_free(snode->program); + snode->program = NULL; + + return NULL; + +error: + fprintf(stderr, "nv2a: Failed to write shader binary file to %s\n", shader_path); + qemu_unlink(shader_path); + g_free(shader_path); + g_free(snode->program); + snode->program = NULL; + return NULL; +} + +void shader_cache_to_disk(ShaderLruNode *snode) +{ + if (!snode->binding || snode->cached) { + return; + } + + GLint program_size; + glGetProgramiv(snode->binding->gl_program, GL_PROGRAM_BINARY_LENGTH, &program_size); + + if (snode->program) { + g_free(snode->program); + snode->program = NULL; + } + + /* program_size might be zero on some systems, if no binary formats are supported */ + if (program_size == 0) { + return; + } + + snode->program = g_malloc(program_size); + GLsizei program_size_copied; + glGetProgramBinary(snode->binding->gl_program, program_size, &program_size_copied, + &snode->program_format, snode->program); + assert(glGetError() == GL_NO_ERROR); + + snode->program_size = program_size_copied; + snode->cached = true; + + char name[24]; + snprintf(name, sizeof(name), "scache-%llx", (unsigned long long) snode->node.hash); + snode->save_thread = g_malloc0(sizeof(QemuThread)); + qemu_thread_create(snode->save_thread, name, shader_write_to_disk, snode, QEMU_THREAD_JOINABLE); +} diff --git a/hw/xbox/nv2a/shaders.h b/hw/xbox/nv2a/shaders.h index a1543eb69c..e58e37e2ea 100644 --- a/hw/xbox/nv2a/shaders.h +++ b/hw/xbox/nv2a/shaders.h @@ -21,12 +21,14 @@ #ifndef HW_NV2A_SHADERS_H #define HW_NV2A_SHADERS_H +#include "qemu/thread.h" #include "qapi/qmp/qstring.h" #include "gl/gloffscreen.h" #include "nv2a_regs.h" #include "vsh.h" #include "psh.h" +#include "lru.h" enum ShaderPrimitiveMode { PRIM_TYPE_INVALID, @@ -136,6 +138,26 @@ typedef struct ShaderBinding { GLint material_alpha_loc; } ShaderBinding; +typedef struct ShaderLruNode { + LruNode node; + bool cached; + void *program; + size_t program_size; + GLenum program_format; + ShaderState state; + ShaderBinding *binding; + QemuThread *save_thread; +} ShaderLruNode; + +typedef struct PGRAPHState PGRAPHState; + +GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode); +void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state); ShaderBinding *generate_shaders(const ShaderState *state); +void shader_cache_init(PGRAPHState *pg); +void shader_write_cache_reload_list(PGRAPHState *pg); +bool shader_load_from_memory(ShaderLruNode *snode); +void shader_cache_to_disk(ShaderLruNode *snode); + #endif diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index eae68bc7ee..54762dd6ef 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -584,6 +584,9 @@ int qemu_open(const char *name, int flags, Error **errp); int qemu_create(const char *name, int flags, mode_t mode, Error **errp); int qemu_close(int fd); int qemu_unlink(const char *name); +#ifdef XBOX +int qemu_mkdir(const char *path); +#endif #ifndef _WIN32 int qemu_dup_flags(int fd, int flags); int qemu_dup(int fd); diff --git a/ui/xemu-settings.cc b/ui/xemu-settings.cc index ae380fb883..5237d5682b 100644 --- a/ui/xemu-settings.cc +++ b/ui/xemu-settings.cc @@ -69,18 +69,32 @@ void xemu_settings_set_path(const char *path) fprintf(stderr, "%s: config path: %s\n", __func__, settings_path); } -const char *xemu_settings_get_path(void) +const char *xemu_settings_get_base_path(void) { - if (settings_path != NULL) { - return settings_path; + static const char *base_path = NULL; + if (base_path != NULL) { + return base_path; } char *base = xemu_settings_detect_portable_mode() ? SDL_GetBasePath() : SDL_GetPrefPath("xemu", "xemu"); assert(base != NULL); - settings_path = g_strdup_printf("%s%s", base, filename); + base_path = g_strdup(base); SDL_free(base); + fprintf(stderr, "%s: base path: %s\n", __func__, base_path); + return base_path; +} + +const char *xemu_settings_get_path(void) +{ + if (settings_path != NULL) { + return settings_path; + } + + const char *base = xemu_settings_get_base_path(); + assert(base != NULL); + settings_path = g_strdup_printf("%s%s", base, filename); fprintf(stderr, "%s: config path: %s\n", __func__, settings_path); return settings_path; } @@ -92,12 +106,9 @@ const char *xemu_settings_get_default_eeprom_path(void) return eeprom_path; } - char *base = xemu_settings_detect_portable_mode() - ? SDL_GetBasePath() - : SDL_GetPrefPath("xemu", "xemu"); + const char *base = xemu_settings_get_base_path(); assert(base != NULL); eeprom_path = g_strdup_printf("%s%s", base, "eeprom.bin"); - SDL_free(base); return eeprom_path; } diff --git a/ui/xemu-settings.h b/ui/xemu-settings.h index c6ba76f1ff..32cb6987dd 100644 --- a/ui/xemu-settings.h +++ b/ui/xemu-settings.h @@ -38,6 +38,9 @@ extern struct config g_config; // Override the default config file paths void xemu_settings_set_path(const char *path); +// Get the path of the base settings dir +const char *xemu_settings_get_base_path(void); + // Get path of the config file on disk const char *xemu_settings_get_path(void); diff --git a/ui/xui/main-menu.cc b/ui/xui/main-menu.cc index a9ffded98e..d659bff5c2 100644 --- a/ui/xui/main-menu.cc +++ b/ui/xui/main-menu.cc @@ -56,8 +56,8 @@ void MainMenuGeneralView::Draw() "Use hardware-accelerated floating point emulation (requires restart)"); #endif - // toggle("Cache shaders to disk", &g_config.perf.cache_shaders, - // "Reduce stutter in games by caching previously generated shaders"); + Toggle("Cache shaders to disk", &g_config.perf.cache_shaders, + "Reduce stutter in games by caching previously generated shaders"); SectionTitle("Miscellaneous"); Toggle("Skip startup animation", &g_config.general.skip_boot_anim, diff --git a/util/osdep.c b/util/osdep.c index a9ad4c4f89..dfe147674a 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -451,9 +451,48 @@ int qemu_unlink(const char *name) return 0; } +#ifdef _WIN32 + wchar_t *namew = g_utf8_to_utf16(name, -1, NULL, NULL, NULL); + if (!namew) { + return -1; + } + int ret = _wunlink(namew); + g_free(namew); + return ret; +#else return unlink(name); +#endif } +#ifdef XBOX + +/* + * Create a directory on the filesystem + * + * Returns: On success, zero is returned. On error, -1 is returned, + * and errno is set appropriately. + */ + +int qemu_mkdir(const char *path) +{ +#ifdef _WIN32 + wchar_t *wpath = g_utf8_to_utf16(path, -1, NULL, NULL, NULL); + if (!wpath) { + return -1; + } + + BOOL dirResult = CreateDirectoryW(wpath, 0); + g_free(wpath); + if (!dirResult) { + return -1; + } +#else + return mkdir(path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); +#endif +} + +#endif + /* * A variant of write(2) which handles partial write. *