From f6fdd421c6a6b0ac771bd450299b65fbe38703c7 Mon Sep 17 00:00:00 2001 From: aliaspider Date: Sat, 30 Dec 2017 16:39:23 +0100 Subject: [PATCH 1/2] (WIIU) shaders: use bitfields for GPU register values. --- wiiu/include/wiiu/gx2/shaders.h | 218 +++++++++++++++++++++++++++++--- wiiu/tex_shader.c | 99 +++++++-------- 2 files changed, 248 insertions(+), 69 deletions(-) diff --git a/wiiu/include/wiiu/gx2/shaders.h b/wiiu/include/wiiu/gx2/shaders.h index 045befee66..2768feb1d8 100644 --- a/wiiu/include/wiiu/gx2/shaders.h +++ b/wiiu/include/wiiu/gx2/shaders.h @@ -71,18 +71,91 @@ typedef struct GX2VertexShader { struct { - uint32_t sq_pgm_resources_vs; - uint32_t vgt_primitiveid_en; - uint32_t spi_vs_out_config; + struct + { + unsigned :2; + bool prime_cache_on_const :1; + bool prime_cache_enable :1; + bool uncached_first_inst :1; + unsigned fetch_cache_lines :3; + bool prime_cache_on_draw :1; + bool prime_cache_pgm_en :1; + bool dx10_clamp :1; + unsigned :5; + unsigned stack_size :8; + unsigned num_gprs :8; + }sq_pgm_resources_vs; + + bool vgt_primitiveid_en; + + struct + { + unsigned :18; + unsigned vs_out_fog_vec_addr : 5; + bool vs_exports_fog : 1; + unsigned :2; + unsigned vs_export_count :5; + bool vs_per_component : 1; + }spi_vs_out_config; + uint32_t num_spi_vs_out_id; - uint32_t spi_vs_out_id[10]; - uint32_t pa_cl_vs_out_cntl; + struct + { + uint8_t semantic_3; + uint8_t semantic_2; + uint8_t semantic_1; + uint8_t semantic_0; + }spi_vs_out_id[10]; + struct + { + bool clip_dist_ena_7 :1; + bool clip_dist_ena_6 :1; + bool clip_dist_ena_5 :1; + bool clip_dist_ena_4 :1; + bool clip_dist_ena_3 :1; + bool clip_dist_ena_2 :1; + bool clip_dist_ena_1 :1; + bool clip_dist_ena_0 :1; + bool cull_dist_ena_7 :1; + bool cull_dist_ena_6 :1; + bool cull_dist_ena_5 :1; + bool cull_dist_ena_0 :1; + bool cull_dist_ena_4 :1; + bool cull_dist_ena_3 :1; + bool cull_dist_ena_2 :1; + bool cull_dist_ena_1 :1; + bool vs_out_misc_side_bus_ena :1; + bool vs_out_ccdist1_vec_ena :1; + bool vs_out_ccdist0_vec_ena :1; + bool vs_out_misc_vec_ena :1; + bool use_vtx_kill_flag :1; + bool use_vtx_viewport_indx :1; + bool use_vtx_render_target_indx :1; + bool use_vtx_edge_flag :1; + unsigned :6; + bool use_vtx_point_size :1; + bool use_vtx_gs_cut_flag :1; + }pa_cl_vs_out_cntl; uint32_t sq_vtx_semantic_clear; uint32_t num_sq_vtx_semantic; - uint32_t sq_vtx_semantic[32]; - uint32_t vgt_strmout_buffer_en; - uint32_t vgt_vertex_reuse_block_cntl; - uint32_t vgt_hos_reuse_depth; + uint32_t sq_vtx_semantic[32]; /* 8 bit */ + struct + { + bool buffer_3_en :1; + bool buffer_2_en :1; + bool buffer_1_en :1; + bool buffer_0_en :1; + }vgt_strmout_buffer_en; + struct + { + unsigned :24; + uint8_t vtx_reuse_depth; + }vgt_vertex_reuse_block_cntl; + struct + { + unsigned :24; + uint8_t reuse_depth; + }vgt_hos_reuse_depth; } regs; uint32_t size; @@ -115,20 +188,129 @@ typedef struct GX2VertexShader GX2RBuffer gx2rBuffer; } GX2VertexShader; +typedef enum { + spi_baryc_cntl_centroids_only = 0, + spi_baryc_cntl_centers_only = 1, + spi_baryc_cntl_centroids_and_centers = 2, +}spi_baryc_cntl; + +typedef enum { + db_z_order_late_z = 0, + db_z_order_early_z_then_late_z = 1, + db_z_order_re_z = 2, + db_z_order_early_z_then_re_z = 3, +}db_z_order; + typedef struct GX2PixelShader { struct { - uint32_t sq_pgm_resources_ps; - uint32_t sq_pgm_exports_ps; - uint32_t spi_ps_in_control_0; - uint32_t spi_ps_in_control_1; + struct + { + unsigned :2; + bool prime_cache_on_const :1; + bool prime_cache_enable :1; + bool uncached_first_inst :1; + unsigned fetch_cache_lines :3; + bool prime_cache_on_draw :1; + bool prime_cache_pgm_en :1; + bool dx10_clamp :1; + unsigned :5; + unsigned stack_size :8; + unsigned num_gprs :8; + }sq_pgm_resources_ps; + + struct + { + unsigned :24; + unsigned export_mode :5; + }sq_pgm_exports_ps; + + struct + { + bool baryc_at_sample_ena :1; + bool position_sample :1; + bool linear_gradient_ena :1; + bool persp_gradient_ena :1; + spi_baryc_cntl baryc_sample_cntl :2; + unsigned param_gen_addr : 7; + unsigned param_gen :4; + unsigned position_addr :5; + bool position_centroid :1; + bool position_ena :1; + unsigned :2; + unsigned num_interp :6; + }spi_ps_in_control_0; + + struct + { + unsigned :1; + bool position_ulc :1; + unsigned fixed_pt_position_addr :5; + bool fixed_pt_position_ena :1; + unsigned fog_addr :7; + unsigned front_face_addr :5; + bool front_face_all_bits :1; + unsigned front_face_chan :2; + bool front_face_ena :1; + unsigned gen_index_pix_addr :7; + bool gen_index_pix :1; + }spi_ps_in_control_1; + uint32_t num_spi_ps_input_cntl; - uint32_t spi_ps_input_cntls[32]; - uint32_t cb_shader_mask; - uint32_t cb_shader_control; - uint32_t db_shader_control; - uint32_t spi_input_z; + + struct + { + unsigned :13; + bool sel_sample :1; + bool pt_sprite_tex :1; + unsigned cyl_wrap :4; + bool sel_linear :1; + bool sel_centroid :1; + bool flat_shade :1; + unsigned default_val :2; + unsigned semantic :8; + }spi_ps_input_cntls[32]; + + struct + { + unsigned output7_enable :4; + unsigned output6_enable :4; + unsigned output5_enable :4; + unsigned output4_enable :4; + unsigned output3_enable :4; + unsigned output2_enable :4; + unsigned output1_enable :4; + unsigned output0_enable :4; + }cb_shader_mask; + struct { + unsigned :24; + bool rt7_enable :1; + bool rt6_enable :1; + bool rt5_enable :1; + bool rt4_enable :1; + bool rt3_enable :1; + bool rt2_enable :1; + bool rt1_enable :1; + bool rt0_enable :1; + }cb_shader_control; + struct + { + unsigned :19; + bool alpha_to_mask_disable :1; + bool exec_on_noop :1; + bool exec_on_hier_fail :1; + bool dual_export_enable :1; + bool mask_export_enable :1; + bool coverage_to_mask_enable :1; + bool kill_enable :1; + db_z_order z_order :2; + unsigned :2; + bool z_export_enable :1; + bool stencil_ref_export_enable :1; + } db_shader_control; + + bool spi_input_z; } regs; uint32_t size; diff --git a/wiiu/tex_shader.c b/wiiu/tex_shader.c index d97475d477..4c49c23b92 100644 --- a/wiiu/tex_shader.c +++ b/wiiu/tex_shader.c @@ -133,67 +133,64 @@ tex_shader_t tex_shader = { { { - 0x00000103, 0x00000000, 0x00000000, 0x00000001, /* sq_pgm_resources_vs, vgt_primitiveid_en, spi_vs_out_config, num_spi_vs_out_id */ - { 0xffffff00, _x9(0xffffffff) }, /* spi_vs_out_id @10 */ - 0x00000000, 0xfffffffc, 0x00000002, /* pa_cl_vs_out_cntl, sq_vtx_semantic_clear, num_sq_vtx_semantic */ + .sq_pgm_resources_vs.num_gprs = 3, + .sq_pgm_resources_vs.stack_size = 1, + .num_spi_vs_out_id = 1, { - 0x00000000, 0x00000001, _x30(0x000000ff) /* sq_vtx_semantic @32 */ + {.semantic_0 = 0x00, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, }, - 0x00000000, 0x0000000e, 0x00000010 /* vgt_strmout_buffer_en, vgt_vertex_reuse_block_cntl, vgt_hos_reuse_depth */ - }, /* regs */ - sizeof(vs_program), /* size */ - (uint8_t*)&vs_program, /* program */ - GX2_SHADER_MODE_UNIFORM_REGISTER, /* mode */ - 0, /* uniformBlockCount */ - NULL, /* uniformBlocks */ - 0, /* uniformVarCount */ - NULL, /* uniformVars */ - 0, /* initialValueCount */ - NULL, /* initialValues */ - 0, /* loopVarCount */ - NULL, /* loopVars */ - 0, /* samplerVarCount */ - NULL, /* samplerVars */ - sizeof(tex_shader.attributes) / sizeof(GX2AttribVar), /* attribVarCount */ - (GX2AttribVar*) &tex_shader.attributes, /* attribVars */ - 0, /* ringItemsize */ - FALSE, /* hasStreamOut */ - {0}, /* streamOutStride @4 */ - {} /* gx2rBuffer */ + .sq_vtx_semantic_clear = ~0x3, + .num_sq_vtx_semantic = 2, + { + 0, 1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }, + .vgt_vertex_reuse_block_cntl.vtx_reuse_depth = 0xE, + .vgt_hos_reuse_depth.reuse_depth = 0x10, + }, /* regs */ + .size = sizeof(vs_program), + .program = (uint8_t*)&vs_program, + .mode = GX2_SHADER_MODE_UNIFORM_REGISTER, + .attribVarCount = sizeof(tex_shader.attributes) / sizeof(GX2AttribVar), (GX2AttribVar*) &tex_shader.attributes, }, { { - 0x00000001, 0x00000002, 0x14000001, 0x00000000, /* sq_pgm_resources_ps, sq_pgm_exports_ps, spi_ps_in_control_0, spi_ps_in_control_1 */ - 0x00000001, /* num_spi_ps_input_cntl */ - { 0x00000100, _x30(0x00000000)}, /* spi_ps_input_cntls @ 32*/ - 0x0000000f, 0x00000001, 0x00000010, 0x00000000 /* cb_shader_mask, cb_shader_control, db_shader_control, spi_input_z */ - }, /* regs */ - sizeof(ps_program), /* size */ - (uint8_t*)&ps_program, /* program */ - GX2_SHADER_MODE_UNIFORM_REGISTER, /* mode */ - 0, /* uniformBlockCount */ - NULL, /* uniformBlocks */ - 0, /* uniformVarCount */ - NULL, /* uniformVars */ - 0, /* initialValueCount */ - NULL, /* initialValues */ - 0, /* loopVarCount */ - NULL, /* loopVars */ - 1, /* samplerVarCount */ - (GX2SamplerVar*) &tex_shader.sampler, /* samplerVars */ - {} /* gx2rBuffer */ + .sq_pgm_resources_ps.num_gprs = 1, + .sq_pgm_exports_ps.export_mode = 0x2, + .spi_ps_in_control_0.num_interp = 1, + .spi_ps_in_control_0.persp_gradient_ena = 1, + .spi_ps_in_control_0.baryc_sample_cntl = spi_baryc_cntl_centers_only, + .num_spi_ps_input_cntl = 1, {{.default_val = 1},}, + .cb_shader_mask.output0_enable = 0xF, + .cb_shader_control.rt0_enable = TRUE, + .db_shader_control.z_order = db_z_order_early_z_then_late_z, + }, /* regs */ + .size = sizeof(ps_program), + .program = (uint8_t*)&ps_program, + .mode = GX2_SHADER_MODE_UNIFORM_REGISTER, + .samplerVarCount = 1, + .samplerVars = (GX2SamplerVar*) &tex_shader.sampler, }, - { "s", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 }, - { - { "position", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 0}, - { "tex_coord_in", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 1} + .sampler = { "s", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 }, + .attributes = { + .position = { "position", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 0}, + .tex_coord = { "tex_coord_in", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 1} }, - { - { + .attribute_stream = { + .position = { 0, 0, 0, GX2_ATTRIB_FORMAT_FLOAT_32_32, GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT }, - { + .tex_coord = { 1, 1, 0, GX2_ATTRIB_FORMAT_FLOAT_32_32, GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT } From b371f4bb32e6b3bb99e5b28097d3b47e6432f4d3 Mon Sep 17 00:00:00 2001 From: aliaspider Date: Sat, 30 Dec 2017 23:21:32 +0100 Subject: [PATCH 2/2] (WIIU) add a color attribute to tex_shader. + some fixes to overlay and menu display code. --- gfx/common/gx2_common.h | 8 +- gfx/drivers/wiiu_gfx.c | 53 +++++++- gfx/drivers_font/wiiu_font.c | 21 ++- menu/drivers_display/menu_display_wiiu.c | 67 +++++++--- wiiu/gx2_shader_inl.h | 9 ++ wiiu/include/wiiu/gx2/shaders.h | 4 +- wiiu/tex_shader.c | 163 ++++++++++++----------- wiiu/tex_shader.h | 2 + 8 files changed, 211 insertions(+), 116 deletions(-) diff --git a/gfx/common/gx2_common.h b/gfx/common/gx2_common.h index 5af9c4fd9b..78626270fe 100644 --- a/gfx/common/gx2_common.h +++ b/gfx/common/gx2_common.h @@ -17,7 +17,9 @@ #define _1 0x05 #define GX2_COMP_SEL(c0, c1, c2, c3) (((c0) << 24) | ((c1) << 16) | ((c2) << 8) | (c3)) -#define COLOR_ABGR(r, g, b, a) (((unsigned)(a) << 24) | ((b) << 16) | ((g) << 8) | ((r) << 0)) +#define COLOR_ABGR(r, g, b, a) (((u32)(a) << 24) | ((u32)(b) << 16) | ((u32)(g) << 8) | ((u32)(r) << 0)) +#define COLOR_RGBA(r, g, b, a) (((u32)(a) << 24) | ((u32)(r) << 16) | ((u32)(g) << 8) | ((u32)(b) << 0)) +#define COLOR_RGBA(r, g, b, a) (((u32)(r) << 24) | ((u32)(g) << 16) | ((u32)(b) << 8) | ((u32)(a) << 0)) //#define GX2_CAN_ACCESS_DATA_SECTION @@ -45,6 +47,7 @@ struct gx2_overlay_data GX2Texture tex; float tex_coord[8]; float vertex_coord[8]; + u32 color[4]; float alpha_mod; }; @@ -59,6 +62,7 @@ typedef struct bool enable; position_t* position; tex_coord_t* tex_coord; + u32* color; } menu; #ifdef HAVE_OVERLAY @@ -73,6 +77,7 @@ typedef struct GX2Texture texture; position_t* position; tex_coord_t* tex_coord; + u32* color; int width; int height; @@ -80,6 +85,7 @@ typedef struct { position_t* positions; tex_coord_t* tex_coords; + u32* colors; int size; int current; } vertex_cache; diff --git a/gfx/drivers/wiiu_gfx.c b/gfx/drivers/wiiu_gfx.c index ff9d1cc9b0..bf10b391d4 100644 --- a/gfx/drivers/wiiu_gfx.c +++ b/gfx/drivers/wiiu_gfx.c @@ -304,11 +304,11 @@ static void* wiiu_gfx_init(const video_info_t* video, wiiu->shader->ps.samplerVarCount * sizeof(GX2SamplerVar)); #endif - wiiu->shader->fs.size = GX2CalcFetchShaderSizeEx(2, GX2_FETCH_SHADER_TESSELLATION_NONE, - GX2_TESSELLATION_MODE_DISCRETE); + wiiu->shader->fs.size = GX2CalcFetchShaderSizeEx(sizeof(wiiu->shader->attribute_stream) / sizeof(GX2AttribStream), + GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE); wiiu->shader->fs.program = MEM2_alloc(wiiu->shader->fs.size, GX2_SHADER_ALIGNMENT); GX2InitFetchShaderEx(&wiiu->shader->fs, (uint8_t*)wiiu->shader->fs.program, - sizeof(wiiu->shader->attribute_stream) / sizeof(GX2AttribStream), + sizeof(wiiu->shader->attribute_stream) / sizeof(GX2AttribStream), (GX2AttribStream*)&wiiu->shader->attribute_stream, GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE); GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->fs.program, wiiu->shader->fs.size); @@ -324,8 +324,16 @@ static void* wiiu_gfx_init(const video_info_t* video, wiiu_set_tex_coords(wiiu->tex_coord, &wiiu->texture, 0, 0, wiiu->texture.surface.width, wiiu->texture.surface.height, wiiu->rotation); + wiiu->color = MEM2_alloc(4 * sizeof(*wiiu->color), GX2_VERTEX_BUFFER_ALIGNMENT); + wiiu->color[0] = 0xFFFFFFFF; + wiiu->color[1] = 0xFFFFFFFF; + wiiu->color[2] = 0xFFFFFFFF; + wiiu->color[3] = 0xFFFFFFFF; + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->color, 4 * sizeof(*wiiu->color)); + GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->position), sizeof(*wiiu->position), wiiu->position); GX2SetAttribBuffer(1, 4 * sizeof(*wiiu->tex_coord), sizeof(*wiiu->tex_coord), wiiu->tex_coord); + GX2SetAttribBuffer(2, 4 * sizeof(*wiiu->color), sizeof(*wiiu->color), wiiu->color); wiiu->menu.position = MEM2_alloc(4 * sizeof(*wiiu->menu.position), GX2_VERTEX_BUFFER_ALIGNMENT); wiiu_set_position(wiiu->menu.position, &wiiu->color_buffer, 0, 0, @@ -335,6 +343,13 @@ static void* wiiu_gfx_init(const video_info_t* video, wiiu_set_tex_coords(wiiu->menu.tex_coord, &wiiu->menu.texture, 0, 0, wiiu->menu.texture.surface.width, wiiu->menu.texture.surface.height, 0); + wiiu->menu.color = MEM2_alloc(4 * sizeof(*wiiu->menu.color), GX2_VERTEX_BUFFER_ALIGNMENT); + wiiu->menu.color[0] = 0xFFFFFF80; + wiiu->menu.color[1] = 0xFFFFFF80; + wiiu->menu.color[2] = 0xFFFFFF80; + wiiu->menu.color[3] = 0xFFFFFF80; + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->menu.color, 4 * sizeof(*wiiu->menu.color)); + /* Initialize frame texture */ memset(&wiiu->texture, 0, sizeof(GX2Texture)); wiiu->texture.surface.width = video->input_scale * RARCH_SCALE_BASE; @@ -390,6 +405,8 @@ static void* wiiu_gfx_init(const video_info_t* video, * sizeof(position_t), GX2_VERTEX_BUFFER_ALIGNMENT); wiiu->vertex_cache.tex_coords = MEM2_alloc(wiiu->vertex_cache.size * sizeof(tex_coord_t), GX2_VERTEX_BUFFER_ALIGNMENT); + wiiu->vertex_cache.colors = MEM2_alloc(wiiu->vertex_cache.size + * sizeof(u32), GX2_VERTEX_BUFFER_ALIGNMENT); /* Initialize samplers */ GX2InitSampler(&wiiu->sampler_nearest, GX2_TEX_CLAMP_MODE_CLAMP, GX2_TEX_XY_FILTER_MODE_POINT); @@ -455,6 +472,7 @@ static void gx2_overlay_tex_geom(void *data, unsigned image, o->tex_coord[5] = y + h; o->tex_coord[6] = x ; o->tex_coord[7] = y + h; + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->tex_coord, sizeof(o->tex_coord)); } static void gx2_overlay_vertex_geom(void *data, unsigned image, @@ -490,6 +508,8 @@ static void gx2_overlay_vertex_geom(void *data, unsigned image, o->vertex_coord[6] = x ; o->vertex_coord[7] = y + h; + + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->vertex_coord,sizeof(o->vertex_coord)); } static void gx2_free_overlay(wiiu_video_t *gx2) @@ -549,10 +569,15 @@ static bool gx2_overlay_load(void *data, gx2_overlay_tex_geom(gx2, i, 0, 0, 1, 1); gx2_overlay_vertex_geom(gx2, i, 0, 0, 1, 1); gx2->overlay[i].alpha_mod = 1.0f; + gx2->overlay[i].color[0] = 0xFFFFFFFF; + gx2->overlay[i].color[1] = 0xFFFFFFFF; + gx2->overlay[i].color[2] = 0xFFFFFFFF; + gx2->overlay[i].color[3] = 0xFFFFFFFF; GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->vertex_coord,sizeof(o->vertex_coord)); - GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->tex_coord, sizeof(o->vertex_coord)); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->tex_coord, sizeof(o->tex_coord)); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->color, sizeof(o->color)); } @@ -577,7 +602,14 @@ static void gx2_overlay_set_alpha(void *data, unsigned image, float mod) wiiu_video_t *gx2 = (wiiu_video_t*)data; if (gx2) + { gx2->overlay[image].alpha_mod = mod; + gx2->overlay[image].color[0] = COLOR_RGBA(0xFF, 0xFF, 0xFF, 0xFF * gx2->overlay[image].alpha_mod); + gx2->overlay[image].color[1] = gx2->overlay[image].color[0]; + gx2->overlay[image].color[2] = gx2->overlay[image].color[0]; + gx2->overlay[image].color[3] = gx2->overlay[image].color[0]; + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, gx2->overlay[image].color, sizeof(gx2->overlay[image].color)); + } } static void gx2_render_overlay(void *data) @@ -592,6 +624,8 @@ static void gx2_render_overlay(void *data) 2*sizeof(float), gx2->overlay[i].vertex_coord); GX2SetAttribBuffer(1, 8 * sizeof(float), 2*sizeof(float), gx2->overlay[i].tex_coord); + GX2SetAttribBuffer(2, 4 * sizeof(u32), + sizeof(u32), gx2->overlay[i].color); GX2SetPixelTexture(&gx2->overlay[i].tex, gx2->shader->sampler.location); GX2SetPixelSampler(&gx2->sampler_linear, gx2->shader->sampler.location); @@ -650,6 +684,7 @@ static void wiiu_gfx_free(void* data) MEM2_free(wiiu->menu.texture.surface.image); MEM2_free(wiiu->vertex_cache.positions); MEM2_free(wiiu->vertex_cache.tex_coords); + MEM2_free(wiiu->vertex_cache.colors); MEM1_free(wiiu->color_buffer.surface.image); @@ -669,8 +704,10 @@ static void wiiu_gfx_free(void* data) #endif MEM2_free(wiiu->position); MEM2_free(wiiu->tex_coord); + MEM2_free(wiiu->color); MEM2_free(wiiu->menu.position); MEM2_free(wiiu->menu.tex_coord); + MEM2_free(wiiu->menu.color); free(wiiu); @@ -792,6 +829,8 @@ static bool wiiu_gfx_frame(void* data, const void* frame, sizeof(*wiiu->position), wiiu->position); GX2SetAttribBuffer(1, 4 * sizeof(*wiiu->tex_coord), sizeof(*wiiu->tex_coord), wiiu->tex_coord); + GX2SetAttribBuffer(2, 4 * sizeof(*wiiu->color), + sizeof(*wiiu->color), wiiu->color); GX2SetPixelTexture(&wiiu->texture, wiiu->shader->sampler.location); GX2SetPixelSampler(wiiu->smooth? &wiiu->sampler_linear : &wiiu->sampler_nearest, @@ -810,6 +849,8 @@ static bool wiiu_gfx_frame(void* data, const void* frame, sizeof(*wiiu->menu.position), wiiu->menu.position); GX2SetAttribBuffer(1, 4 * sizeof(*wiiu->menu.tex_coord), sizeof(*wiiu->menu.tex_coord), wiiu->menu.tex_coord); + GX2SetAttribBuffer(2, 4 * sizeof(*wiiu->menu.color), + sizeof(*wiiu->menu.color), wiiu->menu.color); GX2SetPixelTexture(&wiiu->menu.texture, wiiu->shader->sampler.location); GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location); @@ -822,6 +863,8 @@ static bool wiiu_gfx_frame(void* data, const void* frame, sizeof(position_t), wiiu->vertex_cache.positions); GX2SetAttribBuffer(1, wiiu->vertex_cache.size * sizeof(tex_coord_t), sizeof(tex_coord_t), wiiu->vertex_cache.tex_coords); + GX2SetAttribBuffer(2, wiiu->vertex_cache.size * sizeof(u32), + sizeof(u32), wiiu->vertex_cache.colors); GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location); wiiu->render_msg_enabled = true; @@ -838,6 +881,8 @@ static bool wiiu_gfx_frame(void* data, const void* frame, wiiu->vertex_cache.positions, wiiu->vertex_cache.current * sizeof(position_t)); GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->vertex_cache.tex_coords, wiiu->vertex_cache.current * sizeof(tex_coord_t)); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, + wiiu->vertex_cache.colors, wiiu->vertex_cache.current * sizeof(u32)); if (wiiu->menu.enable) GX2DrawDone(); diff --git a/gfx/drivers_font/wiiu_font.c b/gfx/drivers_font/wiiu_font.c index 4efa3f7148..0317de1b50 100644 --- a/gfx/drivers_font/wiiu_font.c +++ b/gfx/drivers_font/wiiu_font.c @@ -162,6 +162,7 @@ static void wiiu_font_render_line( position_t* pos = wiiu->vertex_cache.positions + wiiu->vertex_cache.current; tex_coord_t* coord = wiiu->vertex_cache.tex_coords + wiiu->vertex_cache.current; + u32* col = wiiu->vertex_cache.colors + wiiu->vertex_cache.current; for (i = 0; i < msg_len; i++) { @@ -219,6 +220,12 @@ static void wiiu_font_render_line( coord[3].v = v0 / font->texture.surface.height; coord += 4; + col[0] = color; + col[1] = color; + col[2] = color; + col[3] = color; + col += 4; + delta_x += glyph->advance_x; delta_y += glyph->advance_y; } @@ -231,6 +238,7 @@ static void wiiu_font_render_line( GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->vertex_cache.positions + wiiu->vertex_cache.current, count * sizeof(position_t)); GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->vertex_cache.tex_coords + wiiu->vertex_cache.current, count * sizeof(tex_coord_t)); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->vertex_cache.colors + wiiu->vertex_cache.current, count * sizeof(u32)); if(font->atlas->dirty) { @@ -251,17 +259,8 @@ static void wiiu_font_render_line( GX2SetPixelTexture(&font->texture, wiiu->shader->sampler.location); - GX2SetBlendConstantColor(((color >> 0) & 0xFF) / 255.0f, ((color >> 8) & 0xFF) / 255.0f, - ((color >> 16) & 0xFF) / 255.0f, ((color >> 24) & 0xFF) / 255.0f); - - GX2SetBlendControl(GX2_RENDER_TARGET_0, GX2_BLEND_MODE_BLEND_FACTOR, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD, - GX2_ENABLE, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD); - GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, count, wiiu->vertex_cache.current, 1); - GX2SetBlendControl(GX2_RENDER_TARGET_0, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD, - GX2_ENABLE, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD); - wiiu->vertex_cache.current = pos - wiiu->vertex_cache.positions; } @@ -357,7 +356,7 @@ static void wiiu_font_render_msg( g = (video_info->font_msg_color_g * 255); b = (video_info->font_msg_color_b * 255); alpha = 255; - color = COLOR_ABGR(r, g, b, alpha); + color = COLOR_RGBA(r, g, b, alpha); drop_x = -2; drop_y = -2; @@ -376,7 +375,7 @@ static void wiiu_font_render_msg( g_dark = g * drop_mod; b_dark = b * drop_mod; alpha_dark = alpha * drop_alpha; - color_dark = COLOR_ABGR(r_dark, g_dark, b_dark, alpha_dark); + color_dark = COLOR_RGBA(r_dark, g_dark, b_dark, alpha_dark); wiiu_font_render_message(video_info, font, msg, scale, color_dark, x + scale * drop_x / width, y + diff --git a/menu/drivers_display/menu_display_wiiu.c b/menu/drivers_display/menu_display_wiiu.c index 03ddcd4b34..998361e495 100644 --- a/menu/drivers_display/menu_display_wiiu.c +++ b/menu/drivers_display/menu_display_wiiu.c @@ -79,37 +79,64 @@ static void menu_display_wiiu_draw(void *data) position_t* pos = wiiu->vertex_cache.positions + wiiu->vertex_cache.current; tex_coord_t* coord = wiiu->vertex_cache.tex_coords + wiiu->vertex_cache.current; + u32* col = wiiu->vertex_cache.colors + wiiu->vertex_cache.current; float x0 = draw->x; float y0 = draw->y; float x1 = x0 + draw->width; float y1 = y0 + draw->height; - pos[0].x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f; - pos[0].y = (2.0f * y0 / wiiu->color_buffer.surface.height) - 1.0f; - pos[1].x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;; - pos[1].y = (2.0f * y0 / wiiu->color_buffer.surface.height) - 1.0f; - pos[2].x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;; - pos[2].y = (2.0f * y1 / wiiu->color_buffer.surface.height) - 1.0f; - pos[3].x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f;; - pos[3].y = (2.0f * y1 / wiiu->color_buffer.surface.height) - 1.0f; + if(draw->coords->vertex && draw->coords->vertices == 4) + { + for(int i = 0; i < 4; i++) + { + pos[i].x = draw->coords->vertex[i << 1] * 2.0f - 1.0f; + pos[i].y = draw->coords->vertex[(i << 1) + 1] * 2.0f - 1.0f; + } + } + else + { + pos[0].x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f; + pos[0].y = (2.0f * y0 / wiiu->color_buffer.surface.height) - 1.0f; + pos[1].x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;; + pos[1].y = (2.0f * y0 / wiiu->color_buffer.surface.height) - 1.0f; + pos[2].x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;; + pos[2].y = (2.0f * y1 / wiiu->color_buffer.surface.height) - 1.0f; + pos[3].x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f;; + pos[3].y = (2.0f * y1 / wiiu->color_buffer.surface.height) - 1.0f; + } + if(draw->coords->tex_coord && draw->coords->vertices == 4) + { + memcpy(coord, draw->coords->tex_coord, 8 * sizeof(float)); + } + else + { + coord[0].u = 0.0f; + coord[0].v = 1.0f; + coord[1].u = 1.0f; + coord[1].v = 1.0f; + coord[2].u = 1.0f; + coord[2].v = 0.0f; + coord[3].u = 0.0f; + coord[3].v = 0.0f; + } - coord[0].u = 0.0f; - coord[0].v = 1.0f; - coord[1].u = 1.0f; - coord[1].v = 1.0f; - coord[2].u = 1.0f; - coord[2].v = 0.0f; - coord[3].u = 0.0f; - coord[3].v = 0.0f; + + col[0] = COLOR_RGBA(0xFF * draw->coords->color[0], 0xFF * draw->coords->color[1], + 0xFF * draw->coords->color[2], 0xFF * draw->coords->color[3]); + col[1] = col[0]; + col[2] = col[0]; + col[3] = col[0]; + +// printf("color : %f, %f, %f, %f --> 0x%08X\n", draw->coords->color[0], draw->coords->color[1], draw->coords->color[2], draw->coords->color[3], col[0]); GX2SetPixelTexture(texture, wiiu->shader->sampler.location); -// GX2SetBlendConstantColor(draw->coords->color[3], draw->coords->color[2], draw->coords->color[1], draw->coords->color[0]); -// GX2SetBlendControl(GX2_RENDER_TARGET_0, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD, -// GX2_ENABLE, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD); - GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, wiiu->vertex_cache.current, 1); + if(draw->coords->vertex && draw->coords->vertices == 4) + GX2DrawEx(GX2_PRIMITIVE_MODE_TRIANGLE_STRIP, 4, wiiu->vertex_cache.current, 1); + else + GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, wiiu->vertex_cache.current, 1); #if 0 printf("(%i,%i,%i,%i) , (%i,%i)\n", (int)draw->x, diff --git a/wiiu/gx2_shader_inl.h b/wiiu/gx2_shader_inl.h index bf35cfcaf2..aaf6056927 100644 --- a/wiiu/gx2_shader_inl.h +++ b/wiiu/gx2_shader_inl.h @@ -113,9 +113,11 @@ #define CF_INST_CALL_FS 0x13 /* ALU */ #define ALU_INST_ALU 0x8 +#define OP2_INST_MUL 0x1 #define OP2_INST_MOV 0x19 /* EXP */ +#define CF_INST_EXP 0x27 #define CF_INST_EXP_DONE 0x28 /* TEX */ @@ -136,6 +138,7 @@ #define PIX(id) EXPORT_ARRAY_BASE_PIX(id) | (EXPORT_TYPE_PIXEL << 13) #define POS0 POS(0) #define PARAM0 PARAM(0) +#define PARAM1 PARAM(1) #define PIX0 PIX(0) /* registers */ @@ -143,6 +146,7 @@ #define _R0 _R(0x0) #define _R1 _R(0x1) #define _R2 _R(0x2) +#define _R3 _R(0x3) /* texture */ #define _t(x) x @@ -161,9 +165,14 @@ #define EXP_DONE(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \ CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP_DONE) +#define EXP(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \ + CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP) + #define ALU_MOV(dstGpr, dstChan, src0Sel, src0Chan) ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, ALU_SRC_0, 0x0, 0x0, 0x0, 0x0, 0x0), \ ALU_WORD1_OP2(0x0, 0x0, 0x0, 0x0, 0x1, 0x0, OP2_INST_MOV, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0) +#define ALU_MUL(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, src1Sel, 0x0, src1Chan, 0x0, 0x0, 0x0), \ + ALU_WORD1_OP2(0x0, 0x0, 0x0, 0x0, 0x1, 0x0, OP2_INST_MUL, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0) #define TEX_SAMPLE(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\ TEX_WORD0(TEX_INST_SAMPLE, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \ diff --git a/wiiu/include/wiiu/gx2/shaders.h b/wiiu/include/wiiu/gx2/shaders.h index 2768feb1d8..cfafc3ad30 100644 --- a/wiiu/include/wiiu/gx2/shaders.h +++ b/wiiu/include/wiiu/gx2/shaders.h @@ -149,12 +149,12 @@ typedef struct GX2VertexShader struct { unsigned :24; - uint8_t vtx_reuse_depth; + unsigned vtx_reuse_depth :8; }vgt_vertex_reuse_block_cntl; struct { unsigned :24; - uint8_t reuse_depth; + unsigned reuse_depth :8; }vgt_hos_reuse_depth; } regs; diff --git a/wiiu/tex_shader.c b/wiiu/tex_shader.c index 4c49c23b92..9cf8259c74 100644 --- a/wiiu/tex_shader.c +++ b/wiiu/tex_shader.c @@ -20,41 +20,34 @@ #include "gx2_shader_inl.h" /******************************************************* - ******************************************************* - * * Vertex Shader GLSL source: - * ******************************************************* - ******************************************************* - * - * attribute vec2 position; - * attribute vec2 tex_coord_in; - * varying vec2 tex_coord; - * void main() - * { - * gl_Position = vec4(position, 0.0, 1.0); - * tex_coord = tex_coord_in; - * } - * + + attribute vec2 position; + attribute vec2 tex_coord_in; + attribute vec4 color_in; + varying vec2 tex_coord; + varying vec4 color; + void main() + { + gl_Position = vec4(position, 0.0, 1.0); + tex_coord = tex_coord_in; + color = color_in; + } + ****************************************************** + * assembly: ****************************************************** - * - * assembly output from AMD's GPU ShaderAnalyzer : - * - ****************************************************** - ****************************************************** - * - * 00 CALL_FS NO_BARRIER - * 01 ALU: ADDR(32) CNT(5) - * 0 x: MOV R2.x, R2.x - * y: MOV R2.y, R2.y - * z: MOV R1.z, 0.0f - * w: MOV R1.w, (0x3F800000, 1.0f).x - * 02 EXP_DONE: POS0, R1 - * 03 EXP_DONE: PARAM0, R2.xyzz NO_BARRIER - * END_OF_PROGRAM - * - ****************************************************** + 00 CALL_FS NO_BARRIER + 01 ALU: ADDR(32) CNT(5) + 0 x: MOV R3.x, R3.x + y: MOV R3.y, R3.y + z: MOV R2.z, 0.0f + w: MOV R2.w, (0x3F800000, 1.0f).x + 02 EXP_DONE: POS0, R2 + 03 EXP: PARAM0, R1 NO_BARRIER + 04 EXP_DONE: PARAM1, R3.xyzz NO_BARRIER + END_OF_PROGRAM ****************************************************** */ @@ -62,70 +55,78 @@ __attribute__((aligned(GX2_SHADER_ALIGNMENT))) static struct { u32 cf[32 * 2]; /* first ADDR() * 2 */ - u32 alu[5 * 2]; /* CNT() sum * 2 */ + u32 alu[5 * 2]; /* alu CNT() * 2 */ } vs_program = { { CALL_FS NO_BARRIER, ALU(32, 5), - EXP_DONE(POS0, _R1, _X, _Y, _Z, _W), - EXP_DONE(PARAM0, _R2, _X, _Y, _Z, _Z) NO_BARRIER + EXP_DONE(POS0, _R2, _X, _Y, _Z, _W), + EXP(PARAM0, _R1, _X, _Y, _Z, _W) NO_BARRIER, + EXP_DONE(PARAM1, _R3, _X, _Y, _Z, _Z) NO_BARRIER END_OF_PROGRAM }, { - ALU_MOV(_R2, _X, _R2, _X), - ALU_MOV(_R2, _Y, _R2, _Y), - ALU_MOV(_R1, _Z, ALU_SRC_0, _X), - ALU_LAST ALU_MOV(_R1, _W, ALU_SRC_LITERAL, _X), ALU_LITERAL(0x3F800000) + ALU_MOV(_R3,_X, _R3,_X), + ALU_MOV(_R3,_Y, _R3,_Y), + ALU_MOV(_R2,_Z, ALU_SRC_0,_X), + ALU_LAST + ALU_MOV(_R2,_W, ALU_SRC_LITERAL,_X), ALU_LITERAL(0x3F800000) } }; /******************************************************* - ******************************************************* - * * Pixel Shader GLSL source: - * ******************************************************* - ******************************************************* - * - * varying vec2 tex_coord; - * uniform sampler2D s; - * void main() - * { - * gl_FragColor = texture2D(s, tex_coord); - * } - * + + varying vec2 tex_coord; + varying vec4 color; + uniform sampler2D s; + void main() + { + gl_FragColor = texture2D(s, tex_coord) * color; + } + ****************************************************** + * assembly: ****************************************************** - * - * assembly output from AMD's GPU ShaderAnalyzer : - * - ****************************************************** - ****************************************************** - * - * 00 TEX: ADDR(16) CNT(1) VALID_PIX - * 0 SAMPLE R0, R0.xy0x, t0, s0 - * 01 EXP_DONE: PIX0, R0 - * END_OF_PROGRAM - * - ******************************************************* + + 00 TEX: ADDR(48) CNT(1) VALID_PIX + 0 SAMPLE R1, R1.xy0x, t0, s0 + 01 ALU: ADDR(32) CNT(4) + 1 x: MUL R0.x, R0.x, R1.x + y: MUL R0.y, R0.y, R1.y + z: MUL R0.z, R0.z, R1.z + w: MUL R0.w, R0.w, R1.w + 02 EXP_DONE: PIX0, R0 + END_OF_PROGRAM + ******************************************************* */ __attribute__((aligned(GX2_SHADER_ALIGNMENT))) static struct { - u32 cf[16 * 2]; /* first ADDR() * 2 */ - u32 tex[1 * 3]; /* CNT() sum * 3 */ + u32 cf[32 * 2]; /* first ADDR() * 2 */ + u32 alu[(48-32) * 2]; /* (tex ADDR() - alu ADDR()) * 2 */ + u32 tex[1 * 3]; /* tex CNT() * 3 */ } ps_program = { { - TEX(16, 1) VALID_PIX, + TEX(48, 1) VALID_PIX, + ALU(32, 4), EXP_DONE(PIX0, _R0, _X, _Y, _Z, _W) END_OF_PROGRAM }, { - TEX_SAMPLE(_R0, _X, _Y, _Z, _W, _R0, _X, _Y, _0, _X, _t0, _s0) + ALU_MUL(_R0,_X, _R0,_X, _R1,_X), + ALU_MUL(_R0,_Y, _R0,_Y, _R1,_Y), + ALU_MUL(_R0,_Z, _R0,_Z, _R1,_Z), + ALU_LAST + ALU_MUL(_R0,_W, _R0,_W, _R1,_W), + }, + { + TEX_SAMPLE(_R1,_X,_Y,_Z,_W, _R1,_X,_Y,_0,_X, _t0, _s0) } }; @@ -133,11 +134,12 @@ tex_shader_t tex_shader = { { { - .sq_pgm_resources_vs.num_gprs = 3, + .sq_pgm_resources_vs.num_gprs = 4, .sq_pgm_resources_vs.stack_size = 1, + .spi_vs_out_config.vs_export_count = 1, .num_spi_vs_out_id = 1, { - {.semantic_0 = 0x00, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0x00, .semantic_1 = 0x01, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, @@ -148,10 +150,10 @@ tex_shader_t tex_shader = {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, }, - .sq_vtx_semantic_clear = ~0x3, - .num_sq_vtx_semantic = 2, + .sq_vtx_semantic_clear = ~0x7, + .num_sq_vtx_semantic = 3, { - 0, 1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0, 1, 2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }, .vgt_vertex_reuse_block_cntl.vtx_reuse_depth = 0xE, @@ -164,12 +166,12 @@ tex_shader_t tex_shader = }, { { - .sq_pgm_resources_ps.num_gprs = 1, + .sq_pgm_resources_ps.num_gprs = 2, .sq_pgm_exports_ps.export_mode = 0x2, - .spi_ps_in_control_0.num_interp = 1, + .spi_ps_in_control_0.num_interp = 2, .spi_ps_in_control_0.persp_gradient_ena = 1, .spi_ps_in_control_0.baryc_sample_cntl = spi_baryc_cntl_centers_only, - .num_spi_ps_input_cntl = 1, {{.default_val = 1},}, + .num_spi_ps_input_cntl = 2, {{.semantic = 0, .default_val = 1},{.semantic = 1, .default_val = 1}}, .cb_shader_mask.output0_enable = 0xF, .cb_shader_control.rt0_enable = TRUE, .db_shader_control.z_order = db_z_order_early_z_then_late_z, @@ -182,16 +184,21 @@ tex_shader_t tex_shader = }, .sampler = { "s", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 }, .attributes = { - .position = { "position", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 0}, - .tex_coord = { "tex_coord_in", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 1} + .color = { "color_in", GX2_SHADER_VAR_TYPE_FLOAT4, 0, 0}, + .position = { "position", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 1}, + .tex_coord = { "tex_coord_in", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 2}, }, .attribute_stream = { + .color = { + 0, 2, 0, GX2_ATTRIB_FORMAT_UNORM_8_8_8_8, + GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _Z, _W), GX2_ENDIAN_SWAP_DEFAULT + }, .position = { - 0, 0, 0, GX2_ATTRIB_FORMAT_FLOAT_32_32, + 1, 0, 0, GX2_ATTRIB_FORMAT_FLOAT_32_32, GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT }, .tex_coord = { - 1, 1, 0, GX2_ATTRIB_FORMAT_FLOAT_32_32, + 2, 1, 0, GX2_ATTRIB_FORMAT_FLOAT_32_32, GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT } }, diff --git a/wiiu/tex_shader.h b/wiiu/tex_shader.h index f907c4e4a8..7868707d13 100644 --- a/wiiu/tex_shader.h +++ b/wiiu/tex_shader.h @@ -28,11 +28,13 @@ typedef struct __attribute__((aligned(GX2_VERTEX_BUFFER_ALIGNMENT))) GX2SamplerVar sampler; struct { + GX2AttribVar color; GX2AttribVar position; GX2AttribVar tex_coord; } attributes; struct { + GX2AttribStream color; GX2AttribStream position; GX2AttribStream tex_coord; } attribute_stream;