diff --git a/src/nv2a_vsh_emulator.c b/src/nv2a_vsh_emulator.c index 1af1171..2932532 100644 --- a/src/nv2a_vsh_emulator.c +++ b/src/nv2a_vsh_emulator.c @@ -37,8 +37,7 @@ static inline void set_register(float *out, const float *in, out[3] = mult * in[swizzle[3]]; } -static inline void fetch_value(float *out, - const Nv2aVshExecutionState *state, +static inline void fetch_value(float *out, const Nv2aVshExecutionState *state, const Nv2aVshInput *input) { const float *in; @@ -125,12 +124,11 @@ static inline void apply_operation(Nv2aVshExecutionState *state, } } -static inline void apply(Nv2aVshExecutionState *state, - const Nv2aVshStep *step) { +static inline void prepare_inputs(float *mac_inputs, float *ilu_input, + Nv2aVshExecutionState *state, + const Nv2aVshStep *step) { // Copy the inputs for both operations first to prevent introducing order // dependent behavior. - float mac_inputs[3 * 4]; - float ilu_input[4]; if (step->mac.opcode) { for (uint32_t i = 0; i < 3; ++i) { if (step->mac.inputs[i].type == NV2ART_NONE) { @@ -142,6 +140,13 @@ static inline void apply(Nv2aVshExecutionState *state, if (step->ilu.opcode) { fetch_value(ilu_input, state, &step->ilu.inputs[0]); } +} + +static inline void apply(Nv2aVshExecutionState *state, + const Nv2aVshStep *step) { + float mac_inputs[3 * 4]; + float ilu_input[4]; + prepare_inputs(mac_inputs, &ilu_input, state, step); if (step->mac.opcode) { apply_operation(state, &step->mac, mac_inputs); @@ -151,6 +156,31 @@ static inline void apply(Nv2aVshExecutionState *state, } } +static inline void apply_track_context_writes(Nv2aVshExecutionState *state, + const Nv2aVshStep *step, + bool *context_dirty) { + float mac_inputs[3 * 4]; + float ilu_input[4]; + prepare_inputs(mac_inputs, &ilu_input, state, step); + + if (step->mac.opcode) { + apply_operation(state, &step->mac, mac_inputs); + if (step->mac.outputs[0].type == NV2ART_CONTEXT) { + context_dirty[step->mac.outputs[0].index] = true; + } else if (step->mac.outputs[1].type == NV2ART_CONTEXT) { + context_dirty[step->mac.outputs[1].index] = true; + } + } + if (step->ilu.opcode) { + apply_operation(state, &step->ilu, &ilu_input); + if (step->ilu.outputs[0].type == NV2ART_CONTEXT) { + context_dirty[step->ilu.outputs[0].index] = true; + } else if (step->ilu.outputs[1].type == NV2ART_CONTEXT) { + context_dirty[step->ilu.outputs[1].index] = true; + } + } +} + void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state, const Nv2aVshProgram *program) { assert(state); @@ -166,6 +196,23 @@ void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state, } } +void nv2a_vsh_emu_execute_track_context_writes(Nv2aVshExecutionState *state, + const Nv2aVshProgram *program, + bool *context_dirty) { + assert(state); + assert(program && program->steps); + assert(context_dirty); + + Nv2aVshStep *step = program->steps; + while (true) { + apply_track_context_writes(state, step, context_dirty); + if (step->is_final) { + break; + } + ++step; + } +} + void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step) { apply(state, step); } diff --git a/src/nv2a_vsh_emulator.h b/src/nv2a_vsh_emulator.h index 5876988..653aa33 100644 --- a/src/nv2a_vsh_emulator.h +++ b/src/nv2a_vsh_emulator.h @@ -11,6 +11,12 @@ extern "C" { void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state, const Nv2aVshProgram *program); +// context_dirty is an array of 192 bools that will be set when writing to +// entries in context_regs. +void nv2a_vsh_emu_execute_track_context_writes(Nv2aVshExecutionState *state, + const Nv2aVshProgram *program, + bool *context_dirty); + // Emulates the given step by applying it to the given state. void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step); diff --git a/src/nv2a_vsh_emulator_execution_state.c b/src/nv2a_vsh_emulator_execution_state.c index 0a51ba9..8149a5f 100644 --- a/src/nv2a_vsh_emulator_execution_state.c +++ b/src/nv2a_vsh_emulator_execution_state.c @@ -8,19 +8,17 @@ Nv2aVshExecutionState nv2a_vsh_emu_initialize_full_execution_state( Nv2aVshExecutionState ret = { (float *)state->input_regs, (float *)state->output_regs, (float *)state->temp_regs, (float *)state->context_regs, - (float *)&state->address_reg, NULL}; + (float *)&state->address_reg}; return ret; } Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state( - Nv2aVshCPUXVSSExecutionState *state, float *context_regs, - bool *context_dirty) { + Nv2aVshCPUXVSSExecutionState *state, float *context_regs) { memset(state, 0, sizeof(*state)); state->context_regs = context_regs; - state->context_dirty = context_dirty; Nv2aVshExecutionState ret = { (float *)state->input_regs, (float *)state->output_regs, (float *)state->temp_regs, (float *)state->context_regs, - (float *)&state->address_reg, state->context_dirty}; + (float *)&state->address_reg}; return ret; } diff --git a/src/nv2a_vsh_emulator_execution_state.h b/src/nv2a_vsh_emulator_execution_state.h index f7cf042..f33d6e7 100644 --- a/src/nv2a_vsh_emulator_execution_state.h +++ b/src/nv2a_vsh_emulator_execution_state.h @@ -69,10 +69,6 @@ typedef struct Nv2aVshCPUXVSSExecutionState_ { // Context regs should be initialized to a flat array of 192 registers. float *context_regs; - // Optional array of 192 bools that will be set when writing to entries in - // context_regs. - bool *context_dirty; - float address_reg[4]; } Nv2aVshCPUXVSSExecutionState; @@ -84,8 +80,7 @@ Nv2aVshExecutionState nv2a_vsh_emu_initialize_full_execution_state( // Initializes the given Nv2aVshCPUXVSSExecutionState and returns an // Nv2aVshExecutionState appropriate for use with nv2a_vsh_cpu_* functions. Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state( - Nv2aVshCPUXVSSExecutionState *state, float *context_regs, - bool *context_dirty); + Nv2aVshCPUXVSSExecutionState *state, float *context_regs); #ifdef __cplusplus } // extern "C" diff --git a/test/emulator/test_basic.cpp b/test/emulator/test_basic.cpp index 3fc0fce..2183b72 100644 --- a/test/emulator/test_basic.cpp +++ b/test/emulator/test_basic.cpp @@ -2,9 +2,9 @@ #include "nv2a_vsh_emulator.h" -#define CHECK_REGISTER(bank, index, actual) \ - do { \ - float *expected = bank + index * 4; \ +#define CHECK_REGISTER(bank, index, actual) \ + do { \ + float *expected = bank + index * 4; \ BOOST_TEST((expected)[0] == (actual)[0]); \ BOOST_TEST((expected)[1] == (actual)[1]); \ BOOST_TEST((expected)[2] == (actual)[2]); \ @@ -84,4 +84,41 @@ BOOST_AUTO_TEST_CASE(step_trivial) { CHECK_REGISTER(state.output_regs, NV2AOR_TEX2, &full_state.input_regs[reg]); } +BOOST_AUTO_TEST_CASE(program_context_tracked) { + Nv2aVshCPUFullExecutionState full_state; + Nv2aVshExecutionState state = + nv2a_vsh_emu_initialize_full_execution_state(&full_state); + uint32_t reg = 11 * 4; + full_state.input_regs[reg + 0] = 123.0f; + full_state.input_regs[reg + 1] = -456.0f; + full_state.input_regs[reg + 2] = 0.789f; + full_state.input_regs[reg + 3] = 32.64f; + + // MOV c1.xyzw, v11 + Nv2aVshStep steps[1]; + Nv2aVshStep *step = &steps[0]; + clear_step(step); + step->mac.opcode = NV2AOP_MOV; + step->mac.outputs[0].type = NV2ART_CONTEXT; + step->mac.outputs[0].index = 1; + step->mac.outputs[0].writemask = NV2AWM_XYZW; + step->mac.inputs[0].type = NV2ART_INPUT; + step->mac.inputs[0].index = 11; + step->is_final = true; + + Nv2aVshProgram program; + program.steps = steps; + + bool context_dirty[192] = {false}; + nv2a_vsh_emu_execute_track_context_writes(&state, &program, context_dirty); + + CHECK_REGISTER(state.context_regs, 1, &full_state.input_regs[reg]); + BOOST_TEST(!context_dirty[0]); + BOOST_TEST(context_dirty[1]); + for (uint32_t i = 2; i < 192; ++i) { + BOOST_TEST_INFO(i); + BOOST_TEST(!context_dirty[i]); + } +} + BOOST_AUTO_TEST_SUITE_END()