Adds emulation.

This commit is contained in:
Erik Abair 2022-06-19 08:06:41 -07:00
parent a5debc73de
commit 25a9fa6835
7 changed files with 441 additions and 4 deletions

View File

@ -25,6 +25,18 @@ include_directories("${Boost_INCLUDE_DIR}" "${GENERATED_FILES_DIR}")
set(CMAKE_CXX_FLAGS_DEBUG "-ggdb -O0")
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
add_library(
nv2a_vsh_disassembler
src/nv2a_vsh_disassembler.c
src/nv2a_vsh_disassembler.h
)
target_include_directories(
nv2a_vsh_disassembler
PRIVATE
src
)
add_library(
nv2a_vsh_cpu
src/nv2a_vsh_cpu.c
@ -38,17 +50,27 @@ target_include_directories(
)
add_library(
nv2a_vsh_disassembler
src/nv2a_vsh_disassembler.c
src/nv2a_vsh_disassembler.h
nv2a_vsh_emulator
src/nv2a_vsh_emulator.c
src/nv2a_vsh_emulator.h
src/nv2a_vsh_emulator_execution_state.c
src/nv2a_vsh_emulator_execution_state.h
)
target_include_directories(
nv2a_vsh_disassembler
nv2a_vsh_emulator
PRIVATE
src
)
target_link_libraries(
nv2a_vsh_emulator
PRIVATE nv2a_vsh_cpu
PRIVATE nv2a_vsh_disassembler
)
add_dependencies(nv2a_vsh_emulator nv2a_vsh_cpu nv2a_vsh_disassembler)
# Tests ----------------------------------------------
add_executable(
operations_tests
@ -88,3 +110,23 @@ target_link_libraries(
)
add_test(NAME disassembler_tests COMMAND disassembler_tests)
add_dependencies(disassembler_tests nv2a_vsh_disassembler)
add_executable(
emulator_tests
test/emulator/test_main.cpp
test/emulator/test_basic.cpp
)
target_include_directories(
emulator_tests
PRIVATE src
PRIVATE test
)
target_link_libraries(
emulator_tests
LINK_PRIVATE
nv2a_vsh_emulator
${Boost_LIBRARIES}
)
add_test(NAME emulator_tests COMMAND emulator_tests)
add_dependencies(emulator_tests nv2a_vsh_emulator)

170
src/nv2a_vsh_emulator.c Normal file
View File

@ -0,0 +1,170 @@
#include "nv2a_vsh_emulator.h"
#include <assert.h>
#include <string.h>
static inline void set_register(Nv2aVshRegister *out, const Nv2aVshRegister *in,
const uint8_t *swizzle, bool negate) {
float mult = negate ? -1.0f : 1.0f;
out->reg.x = mult * in->raw[swizzle[0]];
out->reg.y = mult * in->raw[swizzle[1]];
out->reg.z = mult * in->raw[swizzle[2]];
out->reg.w = mult * in->raw[swizzle[3]];
}
static inline void fetch_value(Nv2aVshRegister *out,
const Nv2aVshExecutionState *state,
const Nv2aVshInput *input) {
const Nv2aVshRegister *in;
switch (input->type) {
default:
assert(!"Invalid input type");
case NV2ART_TEMPORARY:
if (input->index == 12) {
in = (const Nv2aVshRegister *)state->output_regs;
} else {
in = (const Nv2aVshRegister *)(state->temp_regs + input->index * 4);
}
break;
case NV2ART_INPUT:
in = (const Nv2aVshRegister *)(state->input_regs + input->index * 4);
break;
case NV2ART_CONTEXT: {
uint32_t offset = input->index;
if (input->is_relative) {
offset += (int)state->address_reg[0];
}
in = (const Nv2aVshRegister *)(state->context_regs + offset * 4);
} break;
}
set_register(out, in, input->swizzle, input->is_negated);
}
static inline void apply_operation(Nv2aVshExecutionState *state,
const Nv2aVshOperation *op,
const Nv2aVshRegister *inputs) {
Nv2aVshRegister output;
switch (op->opcode) {
case NV2AOP_NOP:
return;
case NV2AOP_MOV:
nv2a_vsh_cpu_mov(&output, inputs);
break;
case NV2AOP_MUL:
case NV2AOP_ADD:
case NV2AOP_MAD:
case NV2AOP_DP3:
case NV2AOP_DPH:
case NV2AOP_DP4:
case NV2AOP_DST:
case NV2AOP_MIN:
case NV2AOP_MAX:
case NV2AOP_SLT:
case NV2AOP_SGE:
case NV2AOP_ARL:
case NV2AOP_RCP:
case NV2AOP_RCC:
case NV2AOP_RSQ:
case NV2AOP_EXP:
case NV2AOP_LOG:
case NV2AOP_LIT:
break;
}
const Nv2aVshOutput *out = op->outputs;
for (uint32_t i = 0; i < 2; ++i, ++out) {
Nv2aVshRegister *outreg;
switch (out->type) {
case NV2ART_INPUT:
assert(!"Attempt to write to input register.");
case NV2ART_NONE:
continue;
case NV2ART_OUTPUT:
assert(out->index < 13 && "Invalid output register target.");
outreg = (Nv2aVshRegister *)(state->output_regs + out->index * 4);
break;
case NV2ART_TEMPORARY:
assert(out->index < 12 && "Invalid temp register target.");
outreg = (Nv2aVshRegister *)(state->temp_regs + out->index * 4);
break;
case NV2ART_CONTEXT:
assert(out->index < 192 && "Invalid context register target.");
outreg = (Nv2aVshRegister *)(state->context_regs + out->index * 4);
break;
case NV2ART_ADDRESS:
outreg = (Nv2aVshRegister *)&state->address_reg;
break;
}
if (out->writemask & NV2AWM_X) {
outreg->reg.x = output.reg.x;
}
if (out->writemask & NV2AWM_Y) {
outreg->reg.y = output.reg.y;
}
if (out->writemask & NV2AWM_Z) {
outreg->reg.z = output.reg.z;
}
if (out->writemask & NV2AWM_W) {
outreg->reg.w = output.reg.w;
}
}
}
static inline void apply(Nv2aVshExecutionState *state,
const Nv2aVshStep *step) {
// Copy the inputs for both operations first to prevent introducing order
// dependent behavior.
Nv2aVshRegister mac_inputs[3];
Nv2aVshRegister ilu_input;
if (step->mac.opcode) {
for (uint32_t i = 0; i < 3; ++i) {
if (step->mac.inputs[i].type == NV2ART_NONE) {
break;
}
fetch_value(&mac_inputs[i], state, &step->mac.inputs[i]);
}
}
if (step->ilu.opcode) {
fetch_value(&ilu_input, state, &step->ilu.inputs[0]);
}
if (step->mac.opcode) {
apply_operation(state, &step->mac, mac_inputs);
}
if (step->ilu.opcode) {
apply_operation(state, &step->ilu, &ilu_input);
}
}
void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state,
const Nv2aVshProgram *program) {
assert(state);
assert(program && program->steps);
Nv2aVshStep *step = program->steps;
while (true) {
apply(state, step);
if (step->is_final) {
break;
}
++step;
}
}
void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step) {
apply(state, step);
}

21
src/nv2a_vsh_emulator.h Normal file
View File

@ -0,0 +1,21 @@
#ifndef NV2A_VSH_CPU_SRC_NV2A_VSH_EMULATOR_H_
#define NV2A_VSH_CPU_SRC_NV2A_VSH_EMULATOR_H_
#include "nv2a_vsh_emulator_execution_state.h"
#ifdef __cplusplus
extern "C" {
#endif
// Emulates the given program by applying each step to the given state.
void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state,
const Nv2aVshProgram *program);
// Emulates the given step by applying it to the given state.
void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // NV2A_VSH_CPU_SRC_NV2A_VSH_EMULATOR_H_

View File

@ -0,0 +1,26 @@
#include "nv2a_vsh_emulator_execution_state.h"
#include <string.h>
Nv2aVshExecutionState nv2a_vsh_emu_initialize_full_execution_state(
Nv2aVshCPUFullExecutionState *state) {
memset(state, 0, sizeof(*state));
Nv2aVshExecutionState ret = {
(float *)state->input_regs, (float *)state->output_regs,
(float *)state->temp_regs, (float *)state->context_regs};
(float *)&state->address_reg;
return ret;
}
Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state(
Nv2aVshCPUXVSSExecutionState *state, float *context_regs,
bool *context_dirty) {
memset(state, 0, sizeof(*state));
state->context_regs = context_regs;
state->context_dirty = context_dirty;
Nv2aVshExecutionState ret = {
(float *)state->input_regs, (float *)state->output_regs,
(float *)state->temp_regs, (float *)state->context_regs};
(float *)&state->address_reg;
return ret;
}

View File

@ -0,0 +1,90 @@
#ifndef NV2A_VSH_CPU_SRC_NV2A_VSH_EMU_EXECUTION_STATE_H_
#define NV2A_VSH_CPU_SRC_NV2A_VSH_EMU_EXECUTION_STATE_H_
#include <stdbool.h>
#include <stdint.h>
#include "nv2a_vsh_cpu.h"
#include "nv2a_vsh_disassembler.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef enum Nv2aVshOutputRegisterName_ {
NV2AOR_POS = 0,
NV2AOR_DIFFUSE = 3,
NV2AOR_SPECULAR = 4,
NV2AOR_FOG_COORD = 5,
NV2AOR_POINT_SIZE = 6,
NV2AOR_BACK_DIFFUSE = 7,
NV2AOR_BACK_SPECULAR = 8,
NV2AOR_TEX0 = 9,
NV2AOR_TEX1 = 10,
NV2AOR_TEX2 = 11,
NV2AOR_TEX3 = 12,
} Nv2aVshOutputRegisterName;
// Models the full execution context of the nv2a. Each entry is a 4-component
// float.
typedef struct Nv2aVshExecutionState_ {
// v0-v15
float *input_regs;
// o0 - o12, 1 and 2 will never be written to.
float *output_regs;
// r0-r11
float *temp_regs;
// c0-c191
float *context_regs;
// a0
float *address_reg;
} Nv2aVshExecutionState;
// Stores the entire execution state for full software-based nv2a vertex shader
// emulation.
typedef struct Nv2aVshCPUFullExecutionState_ {
Nv2aVshRegister input_regs[16];
Nv2aVshRegister output_regs[13];
Nv2aVshRegister temp_regs[12];
Nv2aVshRegister context_regs[192];
Nv2aVshRegister address_reg;
} Nv2aVshCPUFullExecutionState;
// Models a partial execution context where the context registers are held
// externally. Intended for use in vertex state shaders that just write to the
// context registers.
typedef struct Nv2aVshCPUXVSSExecutionState_ {
// Only v0 is used.
Nv2aVshRegister input_regs[1];
// No output registers are used.
float *output_regs;
Nv2aVshRegister temp_regs[11];
// Context regs should be initialized to a flat array of 192 registers.
float *context_regs;
// Optional array of 192 bools that will be set when writing to entries in
// context_regs.
bool *context_dirty;
Nv2aVshRegister address_reg;
} Nv2aVshCPUXVSSExecutionState;
// Initializes the given Nv2aVshCPUFullExecutionState and returns an
// Nv2aVshExecutionState appropriate for use with nv2a_vsh_cpu_* functions.
Nv2aVshExecutionState nv2a_vsh_emu_initialize_full_execution_state(
Nv2aVshCPUFullExecutionState *state);
// Initializes the given Nv2aVshCPUXVSSExecutionState and returns an
// Nv2aVshExecutionState appropriate for use with nv2a_vsh_cpu_* functions.
Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state(
Nv2aVshCPUXVSSExecutionState *state, float *context_regs,
bool *context_dirty);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // NV2A_VSH_CPU_SRC_NV2A_VSH_EMU_EXECUTION_STATE_H_

View File

@ -0,0 +1,86 @@
#include <boost/test/unit_test.hpp>
#include "nv2a_vsh_emulator.h"
#define CHECK_REGISTER(bank, index, actual) \
do { \
float *expected = bank + index * 4; \
BOOST_TEST((expected)[0] == actual[0]); \
BOOST_TEST((expected)[1] == actual[1]); \
BOOST_TEST((expected)[2] == actual[2]); \
BOOST_TEST((expected)[3] == actual[3]); \
} while (0)
static void clear_step(Nv2aVshStep *out) {
out->is_final = false;
memset(out->mac.outputs, 0, sizeof(out->mac.outputs));
memset(out->mac.inputs, 0, sizeof(out->mac.inputs));
memset(out->ilu.outputs, 0, sizeof(out->ilu.outputs));
memset(out->ilu.inputs, 0, sizeof(out->ilu.inputs));
out->mac.opcode = NV2AOP_NOP;
out->mac.inputs[0].type = NV2ART_NONE;
out->mac.inputs[0].swizzle[0] = NV2ASW_X;
out->mac.inputs[0].swizzle[1] = NV2ASW_Y;
out->mac.inputs[0].swizzle[2] = NV2ASW_Z;
out->mac.inputs[0].swizzle[3] = NV2ASW_W;
out->mac.inputs[1].type = NV2ART_NONE;
out->mac.inputs[1].swizzle[0] = NV2ASW_X;
out->mac.inputs[1].swizzle[1] = NV2ASW_Y;
out->mac.inputs[1].swizzle[2] = NV2ASW_Z;
out->mac.inputs[1].swizzle[3] = NV2ASW_W;
out->mac.inputs[2].type = NV2ART_NONE;
out->mac.inputs[2].swizzle[0] = NV2ASW_X;
out->mac.inputs[2].swizzle[1] = NV2ASW_Y;
out->mac.inputs[2].swizzle[2] = NV2ASW_Z;
out->mac.inputs[2].swizzle[3] = NV2ASW_W;
out->mac.outputs[0].type = NV2ART_NONE;
out->mac.outputs[1].type = NV2ART_NONE;
out->ilu.opcode = NV2AOP_NOP;
out->ilu.inputs[0].type = NV2ART_NONE;
out->ilu.inputs[0].swizzle[0] = NV2ASW_X;
out->ilu.inputs[0].swizzle[1] = NV2ASW_Y;
out->ilu.inputs[0].swizzle[2] = NV2ASW_Z;
out->ilu.inputs[0].swizzle[3] = NV2ASW_W;
out->ilu.inputs[1].type = NV2ART_NONE;
out->ilu.inputs[1].swizzle[0] = NV2ASW_X;
out->ilu.inputs[1].swizzle[1] = NV2ASW_Y;
out->ilu.inputs[1].swizzle[2] = NV2ASW_Z;
out->ilu.inputs[1].swizzle[3] = NV2ASW_W;
out->ilu.inputs[2].type = NV2ART_NONE;
out->ilu.inputs[2].swizzle[0] = NV2ASW_X;
out->ilu.inputs[2].swizzle[1] = NV2ASW_Y;
out->ilu.inputs[2].swizzle[2] = NV2ASW_Z;
out->ilu.inputs[2].swizzle[3] = NV2ASW_W;
out->ilu.outputs[0].type = NV2ART_NONE;
out->ilu.outputs[1].type = NV2ART_NONE;
}
BOOST_AUTO_TEST_SUITE(basic_operation_suite)
BOOST_AUTO_TEST_CASE(step_trivial) {
Nv2aVshCPUFullExecutionState full_state;
Nv2aVshExecutionState state =
nv2a_vsh_emu_initialize_full_execution_state(&full_state);
full_state.input_regs[11].reg.x = 123.0f;
full_state.input_regs[11].reg.y = -456.0f;
full_state.input_regs[11].reg.z = 0.789f;
full_state.input_regs[11].reg.w = 32.64f;
// MOV oT2.xyzw, v11
Nv2aVshStep step;
clear_step(&step);
step.mac.opcode = NV2AOP_MOV;
step.mac.outputs[0].type = NV2ART_OUTPUT;
step.mac.outputs[0].index = 11;
step.mac.outputs[0].writemask = NV2AWM_XYZW;
step.mac.inputs[0].type = NV2ART_INPUT;
step.mac.inputs[0].index = 11;
nv2a_vsh_emu_apply(&state, &step);
CHECK_REGISTER(state.output_regs, NV2AOR_TEX2, full_state.input_regs[11].raw);
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -0,0 +1,2 @@
#define BOOST_TEST_MODULE OperationTests
#include <boost/test/unit_test.hpp>