mirror of
https://github.com/xemu-project/nv2a_vsh_cpu.git
synced 2024-11-23 01:39:38 +00:00
Adds emulation.
This commit is contained in:
parent
a5debc73de
commit
25a9fa6835
@ -25,6 +25,18 @@ include_directories("${Boost_INCLUDE_DIR}" "${GENERATED_FILES_DIR}")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-ggdb -O0")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
|
||||
|
||||
add_library(
|
||||
nv2a_vsh_disassembler
|
||||
src/nv2a_vsh_disassembler.c
|
||||
src/nv2a_vsh_disassembler.h
|
||||
)
|
||||
|
||||
target_include_directories(
|
||||
nv2a_vsh_disassembler
|
||||
PRIVATE
|
||||
src
|
||||
)
|
||||
|
||||
add_library(
|
||||
nv2a_vsh_cpu
|
||||
src/nv2a_vsh_cpu.c
|
||||
@ -38,17 +50,27 @@ target_include_directories(
|
||||
)
|
||||
|
||||
add_library(
|
||||
nv2a_vsh_disassembler
|
||||
src/nv2a_vsh_disassembler.c
|
||||
src/nv2a_vsh_disassembler.h
|
||||
nv2a_vsh_emulator
|
||||
src/nv2a_vsh_emulator.c
|
||||
src/nv2a_vsh_emulator.h
|
||||
src/nv2a_vsh_emulator_execution_state.c
|
||||
src/nv2a_vsh_emulator_execution_state.h
|
||||
)
|
||||
|
||||
target_include_directories(
|
||||
nv2a_vsh_disassembler
|
||||
nv2a_vsh_emulator
|
||||
PRIVATE
|
||||
src
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
nv2a_vsh_emulator
|
||||
PRIVATE nv2a_vsh_cpu
|
||||
PRIVATE nv2a_vsh_disassembler
|
||||
)
|
||||
add_dependencies(nv2a_vsh_emulator nv2a_vsh_cpu nv2a_vsh_disassembler)
|
||||
|
||||
|
||||
# Tests ----------------------------------------------
|
||||
add_executable(
|
||||
operations_tests
|
||||
@ -88,3 +110,23 @@ target_link_libraries(
|
||||
)
|
||||
add_test(NAME disassembler_tests COMMAND disassembler_tests)
|
||||
add_dependencies(disassembler_tests nv2a_vsh_disassembler)
|
||||
|
||||
|
||||
add_executable(
|
||||
emulator_tests
|
||||
test/emulator/test_main.cpp
|
||||
test/emulator/test_basic.cpp
|
||||
)
|
||||
target_include_directories(
|
||||
emulator_tests
|
||||
PRIVATE src
|
||||
PRIVATE test
|
||||
)
|
||||
target_link_libraries(
|
||||
emulator_tests
|
||||
LINK_PRIVATE
|
||||
nv2a_vsh_emulator
|
||||
${Boost_LIBRARIES}
|
||||
)
|
||||
add_test(NAME emulator_tests COMMAND emulator_tests)
|
||||
add_dependencies(emulator_tests nv2a_vsh_emulator)
|
||||
|
170
src/nv2a_vsh_emulator.c
Normal file
170
src/nv2a_vsh_emulator.c
Normal file
@ -0,0 +1,170 @@
|
||||
#include "nv2a_vsh_emulator.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
static inline void set_register(Nv2aVshRegister *out, const Nv2aVshRegister *in,
|
||||
const uint8_t *swizzle, bool negate) {
|
||||
float mult = negate ? -1.0f : 1.0f;
|
||||
out->reg.x = mult * in->raw[swizzle[0]];
|
||||
out->reg.y = mult * in->raw[swizzle[1]];
|
||||
out->reg.z = mult * in->raw[swizzle[2]];
|
||||
out->reg.w = mult * in->raw[swizzle[3]];
|
||||
}
|
||||
|
||||
static inline void fetch_value(Nv2aVshRegister *out,
|
||||
const Nv2aVshExecutionState *state,
|
||||
const Nv2aVshInput *input) {
|
||||
const Nv2aVshRegister *in;
|
||||
|
||||
switch (input->type) {
|
||||
default:
|
||||
assert(!"Invalid input type");
|
||||
|
||||
case NV2ART_TEMPORARY:
|
||||
if (input->index == 12) {
|
||||
in = (const Nv2aVshRegister *)state->output_regs;
|
||||
} else {
|
||||
in = (const Nv2aVshRegister *)(state->temp_regs + input->index * 4);
|
||||
}
|
||||
break;
|
||||
|
||||
case NV2ART_INPUT:
|
||||
in = (const Nv2aVshRegister *)(state->input_regs + input->index * 4);
|
||||
break;
|
||||
|
||||
case NV2ART_CONTEXT: {
|
||||
uint32_t offset = input->index;
|
||||
if (input->is_relative) {
|
||||
offset += (int)state->address_reg[0];
|
||||
}
|
||||
in = (const Nv2aVshRegister *)(state->context_regs + offset * 4);
|
||||
} break;
|
||||
}
|
||||
|
||||
set_register(out, in, input->swizzle, input->is_negated);
|
||||
}
|
||||
|
||||
static inline void apply_operation(Nv2aVshExecutionState *state,
|
||||
const Nv2aVshOperation *op,
|
||||
const Nv2aVshRegister *inputs) {
|
||||
Nv2aVshRegister output;
|
||||
|
||||
switch (op->opcode) {
|
||||
case NV2AOP_NOP:
|
||||
return;
|
||||
|
||||
case NV2AOP_MOV:
|
||||
nv2a_vsh_cpu_mov(&output, inputs);
|
||||
break;
|
||||
|
||||
case NV2AOP_MUL:
|
||||
case NV2AOP_ADD:
|
||||
case NV2AOP_MAD:
|
||||
case NV2AOP_DP3:
|
||||
case NV2AOP_DPH:
|
||||
case NV2AOP_DP4:
|
||||
case NV2AOP_DST:
|
||||
case NV2AOP_MIN:
|
||||
case NV2AOP_MAX:
|
||||
case NV2AOP_SLT:
|
||||
case NV2AOP_SGE:
|
||||
case NV2AOP_ARL:
|
||||
case NV2AOP_RCP:
|
||||
case NV2AOP_RCC:
|
||||
case NV2AOP_RSQ:
|
||||
case NV2AOP_EXP:
|
||||
case NV2AOP_LOG:
|
||||
case NV2AOP_LIT:
|
||||
break;
|
||||
}
|
||||
|
||||
const Nv2aVshOutput *out = op->outputs;
|
||||
for (uint32_t i = 0; i < 2; ++i, ++out) {
|
||||
Nv2aVshRegister *outreg;
|
||||
switch (out->type) {
|
||||
case NV2ART_INPUT:
|
||||
assert(!"Attempt to write to input register.");
|
||||
|
||||
case NV2ART_NONE:
|
||||
continue;
|
||||
|
||||
case NV2ART_OUTPUT:
|
||||
assert(out->index < 13 && "Invalid output register target.");
|
||||
outreg = (Nv2aVshRegister *)(state->output_regs + out->index * 4);
|
||||
break;
|
||||
|
||||
case NV2ART_TEMPORARY:
|
||||
assert(out->index < 12 && "Invalid temp register target.");
|
||||
outreg = (Nv2aVshRegister *)(state->temp_regs + out->index * 4);
|
||||
break;
|
||||
|
||||
case NV2ART_CONTEXT:
|
||||
assert(out->index < 192 && "Invalid context register target.");
|
||||
outreg = (Nv2aVshRegister *)(state->context_regs + out->index * 4);
|
||||
break;
|
||||
|
||||
case NV2ART_ADDRESS:
|
||||
outreg = (Nv2aVshRegister *)&state->address_reg;
|
||||
break;
|
||||
}
|
||||
|
||||
if (out->writemask & NV2AWM_X) {
|
||||
outreg->reg.x = output.reg.x;
|
||||
}
|
||||
if (out->writemask & NV2AWM_Y) {
|
||||
outreg->reg.y = output.reg.y;
|
||||
}
|
||||
if (out->writemask & NV2AWM_Z) {
|
||||
outreg->reg.z = output.reg.z;
|
||||
}
|
||||
if (out->writemask & NV2AWM_W) {
|
||||
outreg->reg.w = output.reg.w;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void apply(Nv2aVshExecutionState *state,
|
||||
const Nv2aVshStep *step) {
|
||||
// Copy the inputs for both operations first to prevent introducing order
|
||||
// dependent behavior.
|
||||
Nv2aVshRegister mac_inputs[3];
|
||||
Nv2aVshRegister ilu_input;
|
||||
if (step->mac.opcode) {
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
if (step->mac.inputs[i].type == NV2ART_NONE) {
|
||||
break;
|
||||
}
|
||||
fetch_value(&mac_inputs[i], state, &step->mac.inputs[i]);
|
||||
}
|
||||
}
|
||||
if (step->ilu.opcode) {
|
||||
fetch_value(&ilu_input, state, &step->ilu.inputs[0]);
|
||||
}
|
||||
|
||||
if (step->mac.opcode) {
|
||||
apply_operation(state, &step->mac, mac_inputs);
|
||||
}
|
||||
if (step->ilu.opcode) {
|
||||
apply_operation(state, &step->ilu, &ilu_input);
|
||||
}
|
||||
}
|
||||
|
||||
void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state,
|
||||
const Nv2aVshProgram *program) {
|
||||
assert(state);
|
||||
assert(program && program->steps);
|
||||
|
||||
Nv2aVshStep *step = program->steps;
|
||||
while (true) {
|
||||
apply(state, step);
|
||||
if (step->is_final) {
|
||||
break;
|
||||
}
|
||||
++step;
|
||||
}
|
||||
}
|
||||
|
||||
void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step) {
|
||||
apply(state, step);
|
||||
}
|
21
src/nv2a_vsh_emulator.h
Normal file
21
src/nv2a_vsh_emulator.h
Normal file
@ -0,0 +1,21 @@
|
||||
#ifndef NV2A_VSH_CPU_SRC_NV2A_VSH_EMULATOR_H_
|
||||
#define NV2A_VSH_CPU_SRC_NV2A_VSH_EMULATOR_H_
|
||||
|
||||
#include "nv2a_vsh_emulator_execution_state.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Emulates the given program by applying each step to the given state.
|
||||
void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state,
|
||||
const Nv2aVshProgram *program);
|
||||
|
||||
// Emulates the given step by applying it to the given state.
|
||||
void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // NV2A_VSH_CPU_SRC_NV2A_VSH_EMULATOR_H_
|
26
src/nv2a_vsh_emulator_execution_state.c
Normal file
26
src/nv2a_vsh_emulator_execution_state.c
Normal file
@ -0,0 +1,26 @@
|
||||
#include "nv2a_vsh_emulator_execution_state.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
Nv2aVshExecutionState nv2a_vsh_emu_initialize_full_execution_state(
|
||||
Nv2aVshCPUFullExecutionState *state) {
|
||||
memset(state, 0, sizeof(*state));
|
||||
Nv2aVshExecutionState ret = {
|
||||
(float *)state->input_regs, (float *)state->output_regs,
|
||||
(float *)state->temp_regs, (float *)state->context_regs};
|
||||
(float *)&state->address_reg;
|
||||
return ret;
|
||||
}
|
||||
|
||||
Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state(
|
||||
Nv2aVshCPUXVSSExecutionState *state, float *context_regs,
|
||||
bool *context_dirty) {
|
||||
memset(state, 0, sizeof(*state));
|
||||
state->context_regs = context_regs;
|
||||
state->context_dirty = context_dirty;
|
||||
Nv2aVshExecutionState ret = {
|
||||
(float *)state->input_regs, (float *)state->output_regs,
|
||||
(float *)state->temp_regs, (float *)state->context_regs};
|
||||
(float *)&state->address_reg;
|
||||
return ret;
|
||||
}
|
90
src/nv2a_vsh_emulator_execution_state.h
Normal file
90
src/nv2a_vsh_emulator_execution_state.h
Normal file
@ -0,0 +1,90 @@
|
||||
#ifndef NV2A_VSH_CPU_SRC_NV2A_VSH_EMU_EXECUTION_STATE_H_
|
||||
#define NV2A_VSH_CPU_SRC_NV2A_VSH_EMU_EXECUTION_STATE_H_
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "nv2a_vsh_cpu.h"
|
||||
#include "nv2a_vsh_disassembler.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum Nv2aVshOutputRegisterName_ {
|
||||
NV2AOR_POS = 0,
|
||||
NV2AOR_DIFFUSE = 3,
|
||||
NV2AOR_SPECULAR = 4,
|
||||
NV2AOR_FOG_COORD = 5,
|
||||
NV2AOR_POINT_SIZE = 6,
|
||||
NV2AOR_BACK_DIFFUSE = 7,
|
||||
NV2AOR_BACK_SPECULAR = 8,
|
||||
NV2AOR_TEX0 = 9,
|
||||
NV2AOR_TEX1 = 10,
|
||||
NV2AOR_TEX2 = 11,
|
||||
NV2AOR_TEX3 = 12,
|
||||
} Nv2aVshOutputRegisterName;
|
||||
|
||||
// Models the full execution context of the nv2a. Each entry is a 4-component
|
||||
// float.
|
||||
typedef struct Nv2aVshExecutionState_ {
|
||||
// v0-v15
|
||||
float *input_regs;
|
||||
// o0 - o12, 1 and 2 will never be written to.
|
||||
float *output_regs;
|
||||
// r0-r11
|
||||
float *temp_regs;
|
||||
// c0-c191
|
||||
float *context_regs;
|
||||
// a0
|
||||
float *address_reg;
|
||||
} Nv2aVshExecutionState;
|
||||
|
||||
// Stores the entire execution state for full software-based nv2a vertex shader
|
||||
// emulation.
|
||||
typedef struct Nv2aVshCPUFullExecutionState_ {
|
||||
Nv2aVshRegister input_regs[16];
|
||||
Nv2aVshRegister output_regs[13];
|
||||
Nv2aVshRegister temp_regs[12];
|
||||
Nv2aVshRegister context_regs[192];
|
||||
Nv2aVshRegister address_reg;
|
||||
} Nv2aVshCPUFullExecutionState;
|
||||
|
||||
// Models a partial execution context where the context registers are held
|
||||
// externally. Intended for use in vertex state shaders that just write to the
|
||||
// context registers.
|
||||
typedef struct Nv2aVshCPUXVSSExecutionState_ {
|
||||
// Only v0 is used.
|
||||
Nv2aVshRegister input_regs[1];
|
||||
|
||||
// No output registers are used.
|
||||
float *output_regs;
|
||||
|
||||
Nv2aVshRegister temp_regs[11];
|
||||
|
||||
// Context regs should be initialized to a flat array of 192 registers.
|
||||
float *context_regs;
|
||||
|
||||
// Optional array of 192 bools that will be set when writing to entries in
|
||||
// context_regs.
|
||||
bool *context_dirty;
|
||||
|
||||
Nv2aVshRegister address_reg;
|
||||
} Nv2aVshCPUXVSSExecutionState;
|
||||
|
||||
// Initializes the given Nv2aVshCPUFullExecutionState and returns an
|
||||
// Nv2aVshExecutionState appropriate for use with nv2a_vsh_cpu_* functions.
|
||||
Nv2aVshExecutionState nv2a_vsh_emu_initialize_full_execution_state(
|
||||
Nv2aVshCPUFullExecutionState *state);
|
||||
|
||||
// Initializes the given Nv2aVshCPUXVSSExecutionState and returns an
|
||||
// Nv2aVshExecutionState appropriate for use with nv2a_vsh_cpu_* functions.
|
||||
Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state(
|
||||
Nv2aVshCPUXVSSExecutionState *state, float *context_regs,
|
||||
bool *context_dirty);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // NV2A_VSH_CPU_SRC_NV2A_VSH_EMU_EXECUTION_STATE_H_
|
86
test/emulator/test_basic.cpp
Normal file
86
test/emulator/test_basic.cpp
Normal file
@ -0,0 +1,86 @@
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
||||
#include "nv2a_vsh_emulator.h"
|
||||
|
||||
#define CHECK_REGISTER(bank, index, actual) \
|
||||
do { \
|
||||
float *expected = bank + index * 4; \
|
||||
BOOST_TEST((expected)[0] == actual[0]); \
|
||||
BOOST_TEST((expected)[1] == actual[1]); \
|
||||
BOOST_TEST((expected)[2] == actual[2]); \
|
||||
BOOST_TEST((expected)[3] == actual[3]); \
|
||||
} while (0)
|
||||
|
||||
static void clear_step(Nv2aVshStep *out) {
|
||||
out->is_final = false;
|
||||
memset(out->mac.outputs, 0, sizeof(out->mac.outputs));
|
||||
memset(out->mac.inputs, 0, sizeof(out->mac.inputs));
|
||||
memset(out->ilu.outputs, 0, sizeof(out->ilu.outputs));
|
||||
memset(out->ilu.inputs, 0, sizeof(out->ilu.inputs));
|
||||
|
||||
out->mac.opcode = NV2AOP_NOP;
|
||||
out->mac.inputs[0].type = NV2ART_NONE;
|
||||
out->mac.inputs[0].swizzle[0] = NV2ASW_X;
|
||||
out->mac.inputs[0].swizzle[1] = NV2ASW_Y;
|
||||
out->mac.inputs[0].swizzle[2] = NV2ASW_Z;
|
||||
out->mac.inputs[0].swizzle[3] = NV2ASW_W;
|
||||
out->mac.inputs[1].type = NV2ART_NONE;
|
||||
out->mac.inputs[1].swizzle[0] = NV2ASW_X;
|
||||
out->mac.inputs[1].swizzle[1] = NV2ASW_Y;
|
||||
out->mac.inputs[1].swizzle[2] = NV2ASW_Z;
|
||||
out->mac.inputs[1].swizzle[3] = NV2ASW_W;
|
||||
out->mac.inputs[2].type = NV2ART_NONE;
|
||||
out->mac.inputs[2].swizzle[0] = NV2ASW_X;
|
||||
out->mac.inputs[2].swizzle[1] = NV2ASW_Y;
|
||||
out->mac.inputs[2].swizzle[2] = NV2ASW_Z;
|
||||
out->mac.inputs[2].swizzle[3] = NV2ASW_W;
|
||||
out->mac.outputs[0].type = NV2ART_NONE;
|
||||
out->mac.outputs[1].type = NV2ART_NONE;
|
||||
|
||||
out->ilu.opcode = NV2AOP_NOP;
|
||||
out->ilu.inputs[0].type = NV2ART_NONE;
|
||||
out->ilu.inputs[0].swizzle[0] = NV2ASW_X;
|
||||
out->ilu.inputs[0].swizzle[1] = NV2ASW_Y;
|
||||
out->ilu.inputs[0].swizzle[2] = NV2ASW_Z;
|
||||
out->ilu.inputs[0].swizzle[3] = NV2ASW_W;
|
||||
out->ilu.inputs[1].type = NV2ART_NONE;
|
||||
out->ilu.inputs[1].swizzle[0] = NV2ASW_X;
|
||||
out->ilu.inputs[1].swizzle[1] = NV2ASW_Y;
|
||||
out->ilu.inputs[1].swizzle[2] = NV2ASW_Z;
|
||||
out->ilu.inputs[1].swizzle[3] = NV2ASW_W;
|
||||
out->ilu.inputs[2].type = NV2ART_NONE;
|
||||
out->ilu.inputs[2].swizzle[0] = NV2ASW_X;
|
||||
out->ilu.inputs[2].swizzle[1] = NV2ASW_Y;
|
||||
out->ilu.inputs[2].swizzle[2] = NV2ASW_Z;
|
||||
out->ilu.inputs[2].swizzle[3] = NV2ASW_W;
|
||||
out->ilu.outputs[0].type = NV2ART_NONE;
|
||||
out->ilu.outputs[1].type = NV2ART_NONE;
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE(basic_operation_suite)
|
||||
|
||||
BOOST_AUTO_TEST_CASE(step_trivial) {
|
||||
Nv2aVshCPUFullExecutionState full_state;
|
||||
Nv2aVshExecutionState state =
|
||||
nv2a_vsh_emu_initialize_full_execution_state(&full_state);
|
||||
full_state.input_regs[11].reg.x = 123.0f;
|
||||
full_state.input_regs[11].reg.y = -456.0f;
|
||||
full_state.input_regs[11].reg.z = 0.789f;
|
||||
full_state.input_regs[11].reg.w = 32.64f;
|
||||
|
||||
// MOV oT2.xyzw, v11
|
||||
Nv2aVshStep step;
|
||||
clear_step(&step);
|
||||
step.mac.opcode = NV2AOP_MOV;
|
||||
step.mac.outputs[0].type = NV2ART_OUTPUT;
|
||||
step.mac.outputs[0].index = 11;
|
||||
step.mac.outputs[0].writemask = NV2AWM_XYZW;
|
||||
step.mac.inputs[0].type = NV2ART_INPUT;
|
||||
step.mac.inputs[0].index = 11;
|
||||
|
||||
nv2a_vsh_emu_apply(&state, &step);
|
||||
|
||||
CHECK_REGISTER(state.output_regs, NV2AOR_TEX2, full_state.input_regs[11].raw);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
2
test/emulator/test_main.cpp
Normal file
2
test/emulator/test_main.cpp
Normal file
@ -0,0 +1,2 @@
|
||||
#define BOOST_TEST_MODULE OperationTests
|
||||
#include <boost/test/unit_test.hpp>
|
Loading…
Reference in New Issue
Block a user