From 25a9fa68352bb52f6c1b6453c76c8a1ed83b5cd3 Mon Sep 17 00:00:00 2001 From: Erik Abair Date: Sun, 19 Jun 2022 08:06:41 -0700 Subject: [PATCH] Adds emulation. --- CMakeLists.txt | 50 ++++++- src/nv2a_vsh_emulator.c | 170 ++++++++++++++++++++++++ src/nv2a_vsh_emulator.h | 21 +++ src/nv2a_vsh_emulator_execution_state.c | 26 ++++ src/nv2a_vsh_emulator_execution_state.h | 90 +++++++++++++ test/emulator/test_basic.cpp | 86 ++++++++++++ test/emulator/test_main.cpp | 2 + 7 files changed, 441 insertions(+), 4 deletions(-) create mode 100644 src/nv2a_vsh_emulator.c create mode 100644 src/nv2a_vsh_emulator.h create mode 100644 src/nv2a_vsh_emulator_execution_state.c create mode 100644 src/nv2a_vsh_emulator_execution_state.h create mode 100644 test/emulator/test_basic.cpp create mode 100644 test/emulator/test_main.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c331526..10ff7d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,6 +25,18 @@ include_directories("${Boost_INCLUDE_DIR}" "${GENERATED_FILES_DIR}") set(CMAKE_CXX_FLAGS_DEBUG "-ggdb -O0") set(CMAKE_CXX_FLAGS_RELEASE "-O3") +add_library( + nv2a_vsh_disassembler + src/nv2a_vsh_disassembler.c + src/nv2a_vsh_disassembler.h +) + +target_include_directories( + nv2a_vsh_disassembler + PRIVATE + src +) + add_library( nv2a_vsh_cpu src/nv2a_vsh_cpu.c @@ -38,17 +50,27 @@ target_include_directories( ) add_library( - nv2a_vsh_disassembler - src/nv2a_vsh_disassembler.c - src/nv2a_vsh_disassembler.h + nv2a_vsh_emulator + src/nv2a_vsh_emulator.c + src/nv2a_vsh_emulator.h + src/nv2a_vsh_emulator_execution_state.c + src/nv2a_vsh_emulator_execution_state.h ) target_include_directories( - nv2a_vsh_disassembler + nv2a_vsh_emulator PRIVATE src ) +target_link_libraries( + nv2a_vsh_emulator + PRIVATE nv2a_vsh_cpu + PRIVATE nv2a_vsh_disassembler +) +add_dependencies(nv2a_vsh_emulator nv2a_vsh_cpu nv2a_vsh_disassembler) + + # Tests ---------------------------------------------- add_executable( operations_tests @@ -88,3 +110,23 @@ target_link_libraries( ) add_test(NAME disassembler_tests COMMAND disassembler_tests) add_dependencies(disassembler_tests nv2a_vsh_disassembler) + + +add_executable( + emulator_tests + test/emulator/test_main.cpp + test/emulator/test_basic.cpp +) +target_include_directories( + emulator_tests + PRIVATE src + PRIVATE test +) +target_link_libraries( + emulator_tests + LINK_PRIVATE + nv2a_vsh_emulator + ${Boost_LIBRARIES} +) +add_test(NAME emulator_tests COMMAND emulator_tests) +add_dependencies(emulator_tests nv2a_vsh_emulator) diff --git a/src/nv2a_vsh_emulator.c b/src/nv2a_vsh_emulator.c new file mode 100644 index 0000000..2394fa2 --- /dev/null +++ b/src/nv2a_vsh_emulator.c @@ -0,0 +1,170 @@ +#include "nv2a_vsh_emulator.h" + +#include +#include + +static inline void set_register(Nv2aVshRegister *out, const Nv2aVshRegister *in, + const uint8_t *swizzle, bool negate) { + float mult = negate ? -1.0f : 1.0f; + out->reg.x = mult * in->raw[swizzle[0]]; + out->reg.y = mult * in->raw[swizzle[1]]; + out->reg.z = mult * in->raw[swizzle[2]]; + out->reg.w = mult * in->raw[swizzle[3]]; +} + +static inline void fetch_value(Nv2aVshRegister *out, + const Nv2aVshExecutionState *state, + const Nv2aVshInput *input) { + const Nv2aVshRegister *in; + + switch (input->type) { + default: + assert(!"Invalid input type"); + + case NV2ART_TEMPORARY: + if (input->index == 12) { + in = (const Nv2aVshRegister *)state->output_regs; + } else { + in = (const Nv2aVshRegister *)(state->temp_regs + input->index * 4); + } + break; + + case NV2ART_INPUT: + in = (const Nv2aVshRegister *)(state->input_regs + input->index * 4); + break; + + case NV2ART_CONTEXT: { + uint32_t offset = input->index; + if (input->is_relative) { + offset += (int)state->address_reg[0]; + } + in = (const Nv2aVshRegister *)(state->context_regs + offset * 4); + } break; + } + + set_register(out, in, input->swizzle, input->is_negated); +} + +static inline void apply_operation(Nv2aVshExecutionState *state, + const Nv2aVshOperation *op, + const Nv2aVshRegister *inputs) { + Nv2aVshRegister output; + + switch (op->opcode) { + case NV2AOP_NOP: + return; + + case NV2AOP_MOV: + nv2a_vsh_cpu_mov(&output, inputs); + break; + + case NV2AOP_MUL: + case NV2AOP_ADD: + case NV2AOP_MAD: + case NV2AOP_DP3: + case NV2AOP_DPH: + case NV2AOP_DP4: + case NV2AOP_DST: + case NV2AOP_MIN: + case NV2AOP_MAX: + case NV2AOP_SLT: + case NV2AOP_SGE: + case NV2AOP_ARL: + case NV2AOP_RCP: + case NV2AOP_RCC: + case NV2AOP_RSQ: + case NV2AOP_EXP: + case NV2AOP_LOG: + case NV2AOP_LIT: + break; + } + + const Nv2aVshOutput *out = op->outputs; + for (uint32_t i = 0; i < 2; ++i, ++out) { + Nv2aVshRegister *outreg; + switch (out->type) { + case NV2ART_INPUT: + assert(!"Attempt to write to input register."); + + case NV2ART_NONE: + continue; + + case NV2ART_OUTPUT: + assert(out->index < 13 && "Invalid output register target."); + outreg = (Nv2aVshRegister *)(state->output_regs + out->index * 4); + break; + + case NV2ART_TEMPORARY: + assert(out->index < 12 && "Invalid temp register target."); + outreg = (Nv2aVshRegister *)(state->temp_regs + out->index * 4); + break; + + case NV2ART_CONTEXT: + assert(out->index < 192 && "Invalid context register target."); + outreg = (Nv2aVshRegister *)(state->context_regs + out->index * 4); + break; + + case NV2ART_ADDRESS: + outreg = (Nv2aVshRegister *)&state->address_reg; + break; + } + + if (out->writemask & NV2AWM_X) { + outreg->reg.x = output.reg.x; + } + if (out->writemask & NV2AWM_Y) { + outreg->reg.y = output.reg.y; + } + if (out->writemask & NV2AWM_Z) { + outreg->reg.z = output.reg.z; + } + if (out->writemask & NV2AWM_W) { + outreg->reg.w = output.reg.w; + } + } +} + +static inline void apply(Nv2aVshExecutionState *state, + const Nv2aVshStep *step) { + // Copy the inputs for both operations first to prevent introducing order + // dependent behavior. + Nv2aVshRegister mac_inputs[3]; + Nv2aVshRegister ilu_input; + if (step->mac.opcode) { + for (uint32_t i = 0; i < 3; ++i) { + if (step->mac.inputs[i].type == NV2ART_NONE) { + break; + } + fetch_value(&mac_inputs[i], state, &step->mac.inputs[i]); + } + } + if (step->ilu.opcode) { + fetch_value(&ilu_input, state, &step->ilu.inputs[0]); + } + + if (step->mac.opcode) { + apply_operation(state, &step->mac, mac_inputs); + } + if (step->ilu.opcode) { + apply_operation(state, &step->ilu, &ilu_input); + } +} + +void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state, + const Nv2aVshProgram *program) { + assert(state); + assert(program && program->steps); + + Nv2aVshStep *step = program->steps; + while (true) { + apply(state, step); + if (step->is_final) { + break; + } + ++step; + } +} + +void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step) { + apply(state, step); +} diff --git a/src/nv2a_vsh_emulator.h b/src/nv2a_vsh_emulator.h new file mode 100644 index 0000000..5876988 --- /dev/null +++ b/src/nv2a_vsh_emulator.h @@ -0,0 +1,21 @@ +#ifndef NV2A_VSH_CPU_SRC_NV2A_VSH_EMULATOR_H_ +#define NV2A_VSH_CPU_SRC_NV2A_VSH_EMULATOR_H_ + +#include "nv2a_vsh_emulator_execution_state.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Emulates the given program by applying each step to the given state. +void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state, + const Nv2aVshProgram *program); + +// Emulates the given step by applying it to the given state. +void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // NV2A_VSH_CPU_SRC_NV2A_VSH_EMULATOR_H_ diff --git a/src/nv2a_vsh_emulator_execution_state.c b/src/nv2a_vsh_emulator_execution_state.c new file mode 100644 index 0000000..b84ce32 --- /dev/null +++ b/src/nv2a_vsh_emulator_execution_state.c @@ -0,0 +1,26 @@ +#include "nv2a_vsh_emulator_execution_state.h" + +#include + +Nv2aVshExecutionState nv2a_vsh_emu_initialize_full_execution_state( + Nv2aVshCPUFullExecutionState *state) { + memset(state, 0, sizeof(*state)); + Nv2aVshExecutionState ret = { + (float *)state->input_regs, (float *)state->output_regs, + (float *)state->temp_regs, (float *)state->context_regs}; + (float *)&state->address_reg; + return ret; +} + +Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state( + Nv2aVshCPUXVSSExecutionState *state, float *context_regs, + bool *context_dirty) { + memset(state, 0, sizeof(*state)); + state->context_regs = context_regs; + state->context_dirty = context_dirty; + Nv2aVshExecutionState ret = { + (float *)state->input_regs, (float *)state->output_regs, + (float *)state->temp_regs, (float *)state->context_regs}; + (float *)&state->address_reg; + return ret; +} diff --git a/src/nv2a_vsh_emulator_execution_state.h b/src/nv2a_vsh_emulator_execution_state.h new file mode 100644 index 0000000..03bb8cf --- /dev/null +++ b/src/nv2a_vsh_emulator_execution_state.h @@ -0,0 +1,90 @@ +#ifndef NV2A_VSH_CPU_SRC_NV2A_VSH_EMU_EXECUTION_STATE_H_ +#define NV2A_VSH_CPU_SRC_NV2A_VSH_EMU_EXECUTION_STATE_H_ + +#include +#include + +#include "nv2a_vsh_cpu.h" +#include "nv2a_vsh_disassembler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum Nv2aVshOutputRegisterName_ { + NV2AOR_POS = 0, + NV2AOR_DIFFUSE = 3, + NV2AOR_SPECULAR = 4, + NV2AOR_FOG_COORD = 5, + NV2AOR_POINT_SIZE = 6, + NV2AOR_BACK_DIFFUSE = 7, + NV2AOR_BACK_SPECULAR = 8, + NV2AOR_TEX0 = 9, + NV2AOR_TEX1 = 10, + NV2AOR_TEX2 = 11, + NV2AOR_TEX3 = 12, +} Nv2aVshOutputRegisterName; + +// Models the full execution context of the nv2a. Each entry is a 4-component +// float. +typedef struct Nv2aVshExecutionState_ { + // v0-v15 + float *input_regs; + // o0 - o12, 1 and 2 will never be written to. + float *output_regs; + // r0-r11 + float *temp_regs; + // c0-c191 + float *context_regs; + // a0 + float *address_reg; +} Nv2aVshExecutionState; + +// Stores the entire execution state for full software-based nv2a vertex shader +// emulation. +typedef struct Nv2aVshCPUFullExecutionState_ { + Nv2aVshRegister input_regs[16]; + Nv2aVshRegister output_regs[13]; + Nv2aVshRegister temp_regs[12]; + Nv2aVshRegister context_regs[192]; + Nv2aVshRegister address_reg; +} Nv2aVshCPUFullExecutionState; + +// Models a partial execution context where the context registers are held +// externally. Intended for use in vertex state shaders that just write to the +// context registers. +typedef struct Nv2aVshCPUXVSSExecutionState_ { + // Only v0 is used. + Nv2aVshRegister input_regs[1]; + + // No output registers are used. + float *output_regs; + + Nv2aVshRegister temp_regs[11]; + + // Context regs should be initialized to a flat array of 192 registers. + float *context_regs; + + // Optional array of 192 bools that will be set when writing to entries in + // context_regs. + bool *context_dirty; + + Nv2aVshRegister address_reg; +} Nv2aVshCPUXVSSExecutionState; + +// Initializes the given Nv2aVshCPUFullExecutionState and returns an +// Nv2aVshExecutionState appropriate for use with nv2a_vsh_cpu_* functions. +Nv2aVshExecutionState nv2a_vsh_emu_initialize_full_execution_state( + Nv2aVshCPUFullExecutionState *state); + +// Initializes the given Nv2aVshCPUXVSSExecutionState and returns an +// Nv2aVshExecutionState appropriate for use with nv2a_vsh_cpu_* functions. +Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state( + Nv2aVshCPUXVSSExecutionState *state, float *context_regs, + bool *context_dirty); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // NV2A_VSH_CPU_SRC_NV2A_VSH_EMU_EXECUTION_STATE_H_ diff --git a/test/emulator/test_basic.cpp b/test/emulator/test_basic.cpp new file mode 100644 index 0000000..68c7cdc --- /dev/null +++ b/test/emulator/test_basic.cpp @@ -0,0 +1,86 @@ +#include + +#include "nv2a_vsh_emulator.h" + +#define CHECK_REGISTER(bank, index, actual) \ + do { \ + float *expected = bank + index * 4; \ + BOOST_TEST((expected)[0] == actual[0]); \ + BOOST_TEST((expected)[1] == actual[1]); \ + BOOST_TEST((expected)[2] == actual[2]); \ + BOOST_TEST((expected)[3] == actual[3]); \ + } while (0) + +static void clear_step(Nv2aVshStep *out) { + out->is_final = false; + memset(out->mac.outputs, 0, sizeof(out->mac.outputs)); + memset(out->mac.inputs, 0, sizeof(out->mac.inputs)); + memset(out->ilu.outputs, 0, sizeof(out->ilu.outputs)); + memset(out->ilu.inputs, 0, sizeof(out->ilu.inputs)); + + out->mac.opcode = NV2AOP_NOP; + out->mac.inputs[0].type = NV2ART_NONE; + out->mac.inputs[0].swizzle[0] = NV2ASW_X; + out->mac.inputs[0].swizzle[1] = NV2ASW_Y; + out->mac.inputs[0].swizzle[2] = NV2ASW_Z; + out->mac.inputs[0].swizzle[3] = NV2ASW_W; + out->mac.inputs[1].type = NV2ART_NONE; + out->mac.inputs[1].swizzle[0] = NV2ASW_X; + out->mac.inputs[1].swizzle[1] = NV2ASW_Y; + out->mac.inputs[1].swizzle[2] = NV2ASW_Z; + out->mac.inputs[1].swizzle[3] = NV2ASW_W; + out->mac.inputs[2].type = NV2ART_NONE; + out->mac.inputs[2].swizzle[0] = NV2ASW_X; + out->mac.inputs[2].swizzle[1] = NV2ASW_Y; + out->mac.inputs[2].swizzle[2] = NV2ASW_Z; + out->mac.inputs[2].swizzle[3] = NV2ASW_W; + out->mac.outputs[0].type = NV2ART_NONE; + out->mac.outputs[1].type = NV2ART_NONE; + + out->ilu.opcode = NV2AOP_NOP; + out->ilu.inputs[0].type = NV2ART_NONE; + out->ilu.inputs[0].swizzle[0] = NV2ASW_X; + out->ilu.inputs[0].swizzle[1] = NV2ASW_Y; + out->ilu.inputs[0].swizzle[2] = NV2ASW_Z; + out->ilu.inputs[0].swizzle[3] = NV2ASW_W; + out->ilu.inputs[1].type = NV2ART_NONE; + out->ilu.inputs[1].swizzle[0] = NV2ASW_X; + out->ilu.inputs[1].swizzle[1] = NV2ASW_Y; + out->ilu.inputs[1].swizzle[2] = NV2ASW_Z; + out->ilu.inputs[1].swizzle[3] = NV2ASW_W; + out->ilu.inputs[2].type = NV2ART_NONE; + out->ilu.inputs[2].swizzle[0] = NV2ASW_X; + out->ilu.inputs[2].swizzle[1] = NV2ASW_Y; + out->ilu.inputs[2].swizzle[2] = NV2ASW_Z; + out->ilu.inputs[2].swizzle[3] = NV2ASW_W; + out->ilu.outputs[0].type = NV2ART_NONE; + out->ilu.outputs[1].type = NV2ART_NONE; +} + +BOOST_AUTO_TEST_SUITE(basic_operation_suite) + +BOOST_AUTO_TEST_CASE(step_trivial) { + Nv2aVshCPUFullExecutionState full_state; + Nv2aVshExecutionState state = + nv2a_vsh_emu_initialize_full_execution_state(&full_state); + full_state.input_regs[11].reg.x = 123.0f; + full_state.input_regs[11].reg.y = -456.0f; + full_state.input_regs[11].reg.z = 0.789f; + full_state.input_regs[11].reg.w = 32.64f; + + // MOV oT2.xyzw, v11 + Nv2aVshStep step; + clear_step(&step); + step.mac.opcode = NV2AOP_MOV; + step.mac.outputs[0].type = NV2ART_OUTPUT; + step.mac.outputs[0].index = 11; + step.mac.outputs[0].writemask = NV2AWM_XYZW; + step.mac.inputs[0].type = NV2ART_INPUT; + step.mac.inputs[0].index = 11; + + nv2a_vsh_emu_apply(&state, &step); + + CHECK_REGISTER(state.output_regs, NV2AOR_TEX2, full_state.input_regs[11].raw); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test/emulator/test_main.cpp b/test/emulator/test_main.cpp new file mode 100644 index 0000000..ea2b541 --- /dev/null +++ b/test/emulator/test_main.cpp @@ -0,0 +1,2 @@ +#define BOOST_TEST_MODULE OperationTests +#include