mirror of
https://github.com/xemu-project/nv2a_vsh_cpu.git
synced 2024-11-26 19:20:22 +00:00
Implements context write tracking.
This commit is contained in:
parent
71b2265efc
commit
79361aec5f
@ -37,8 +37,7 @@ static inline void set_register(float *out, const float *in,
|
|||||||
out[3] = mult * in[swizzle[3]];
|
out[3] = mult * in[swizzle[3]];
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void fetch_value(float *out,
|
static inline void fetch_value(float *out, const Nv2aVshExecutionState *state,
|
||||||
const Nv2aVshExecutionState *state,
|
|
||||||
const Nv2aVshInput *input) {
|
const Nv2aVshInput *input) {
|
||||||
const float *in;
|
const float *in;
|
||||||
|
|
||||||
@ -125,12 +124,11 @@ static inline void apply_operation(Nv2aVshExecutionState *state,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void apply(Nv2aVshExecutionState *state,
|
static inline void prepare_inputs(float *mac_inputs, float *ilu_input,
|
||||||
const Nv2aVshStep *step) {
|
Nv2aVshExecutionState *state,
|
||||||
|
const Nv2aVshStep *step) {
|
||||||
// Copy the inputs for both operations first to prevent introducing order
|
// Copy the inputs for both operations first to prevent introducing order
|
||||||
// dependent behavior.
|
// dependent behavior.
|
||||||
float mac_inputs[3 * 4];
|
|
||||||
float ilu_input[4];
|
|
||||||
if (step->mac.opcode) {
|
if (step->mac.opcode) {
|
||||||
for (uint32_t i = 0; i < 3; ++i) {
|
for (uint32_t i = 0; i < 3; ++i) {
|
||||||
if (step->mac.inputs[i].type == NV2ART_NONE) {
|
if (step->mac.inputs[i].type == NV2ART_NONE) {
|
||||||
@ -142,6 +140,13 @@ static inline void apply(Nv2aVshExecutionState *state,
|
|||||||
if (step->ilu.opcode) {
|
if (step->ilu.opcode) {
|
||||||
fetch_value(ilu_input, state, &step->ilu.inputs[0]);
|
fetch_value(ilu_input, state, &step->ilu.inputs[0]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void apply(Nv2aVshExecutionState *state,
|
||||||
|
const Nv2aVshStep *step) {
|
||||||
|
float mac_inputs[3 * 4];
|
||||||
|
float ilu_input[4];
|
||||||
|
prepare_inputs(mac_inputs, &ilu_input, state, step);
|
||||||
|
|
||||||
if (step->mac.opcode) {
|
if (step->mac.opcode) {
|
||||||
apply_operation(state, &step->mac, mac_inputs);
|
apply_operation(state, &step->mac, mac_inputs);
|
||||||
@ -151,6 +156,31 @@ static inline void apply(Nv2aVshExecutionState *state,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void apply_track_context_writes(Nv2aVshExecutionState *state,
|
||||||
|
const Nv2aVshStep *step,
|
||||||
|
bool *context_dirty) {
|
||||||
|
float mac_inputs[3 * 4];
|
||||||
|
float ilu_input[4];
|
||||||
|
prepare_inputs(mac_inputs, &ilu_input, state, step);
|
||||||
|
|
||||||
|
if (step->mac.opcode) {
|
||||||
|
apply_operation(state, &step->mac, mac_inputs);
|
||||||
|
if (step->mac.outputs[0].type == NV2ART_CONTEXT) {
|
||||||
|
context_dirty[step->mac.outputs[0].index] = true;
|
||||||
|
} else if (step->mac.outputs[1].type == NV2ART_CONTEXT) {
|
||||||
|
context_dirty[step->mac.outputs[1].index] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (step->ilu.opcode) {
|
||||||
|
apply_operation(state, &step->ilu, &ilu_input);
|
||||||
|
if (step->ilu.outputs[0].type == NV2ART_CONTEXT) {
|
||||||
|
context_dirty[step->ilu.outputs[0].index] = true;
|
||||||
|
} else if (step->ilu.outputs[1].type == NV2ART_CONTEXT) {
|
||||||
|
context_dirty[step->ilu.outputs[1].index] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state,
|
void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state,
|
||||||
const Nv2aVshProgram *program) {
|
const Nv2aVshProgram *program) {
|
||||||
assert(state);
|
assert(state);
|
||||||
@ -166,6 +196,23 @@ void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void nv2a_vsh_emu_execute_track_context_writes(Nv2aVshExecutionState *state,
|
||||||
|
const Nv2aVshProgram *program,
|
||||||
|
bool *context_dirty) {
|
||||||
|
assert(state);
|
||||||
|
assert(program && program->steps);
|
||||||
|
assert(context_dirty);
|
||||||
|
|
||||||
|
Nv2aVshStep *step = program->steps;
|
||||||
|
while (true) {
|
||||||
|
apply_track_context_writes(state, step, context_dirty);
|
||||||
|
if (step->is_final) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
++step;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step) {
|
void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step) {
|
||||||
apply(state, step);
|
apply(state, step);
|
||||||
}
|
}
|
||||||
|
@ -11,6 +11,12 @@ extern "C" {
|
|||||||
void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state,
|
void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state,
|
||||||
const Nv2aVshProgram *program);
|
const Nv2aVshProgram *program);
|
||||||
|
|
||||||
|
// context_dirty is an array of 192 bools that will be set when writing to
|
||||||
|
// entries in context_regs.
|
||||||
|
void nv2a_vsh_emu_execute_track_context_writes(Nv2aVshExecutionState *state,
|
||||||
|
const Nv2aVshProgram *program,
|
||||||
|
bool *context_dirty);
|
||||||
|
|
||||||
// Emulates the given step by applying it to the given state.
|
// Emulates the given step by applying it to the given state.
|
||||||
void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step);
|
void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step);
|
||||||
|
|
||||||
|
@ -8,19 +8,17 @@ Nv2aVshExecutionState nv2a_vsh_emu_initialize_full_execution_state(
|
|||||||
Nv2aVshExecutionState ret = {
|
Nv2aVshExecutionState ret = {
|
||||||
(float *)state->input_regs, (float *)state->output_regs,
|
(float *)state->input_regs, (float *)state->output_regs,
|
||||||
(float *)state->temp_regs, (float *)state->context_regs,
|
(float *)state->temp_regs, (float *)state->context_regs,
|
||||||
(float *)&state->address_reg, NULL};
|
(float *)&state->address_reg};
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state(
|
Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state(
|
||||||
Nv2aVshCPUXVSSExecutionState *state, float *context_regs,
|
Nv2aVshCPUXVSSExecutionState *state, float *context_regs) {
|
||||||
bool *context_dirty) {
|
|
||||||
memset(state, 0, sizeof(*state));
|
memset(state, 0, sizeof(*state));
|
||||||
state->context_regs = context_regs;
|
state->context_regs = context_regs;
|
||||||
state->context_dirty = context_dirty;
|
|
||||||
Nv2aVshExecutionState ret = {
|
Nv2aVshExecutionState ret = {
|
||||||
(float *)state->input_regs, (float *)state->output_regs,
|
(float *)state->input_regs, (float *)state->output_regs,
|
||||||
(float *)state->temp_regs, (float *)state->context_regs,
|
(float *)state->temp_regs, (float *)state->context_regs,
|
||||||
(float *)&state->address_reg, state->context_dirty};
|
(float *)&state->address_reg};
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -69,10 +69,6 @@ typedef struct Nv2aVshCPUXVSSExecutionState_ {
|
|||||||
// Context regs should be initialized to a flat array of 192 registers.
|
// Context regs should be initialized to a flat array of 192 registers.
|
||||||
float *context_regs;
|
float *context_regs;
|
||||||
|
|
||||||
// Optional array of 192 bools that will be set when writing to entries in
|
|
||||||
// context_regs.
|
|
||||||
bool *context_dirty;
|
|
||||||
|
|
||||||
float address_reg[4];
|
float address_reg[4];
|
||||||
} Nv2aVshCPUXVSSExecutionState;
|
} Nv2aVshCPUXVSSExecutionState;
|
||||||
|
|
||||||
@ -84,8 +80,7 @@ Nv2aVshExecutionState nv2a_vsh_emu_initialize_full_execution_state(
|
|||||||
// Initializes the given Nv2aVshCPUXVSSExecutionState and returns an
|
// Initializes the given Nv2aVshCPUXVSSExecutionState and returns an
|
||||||
// Nv2aVshExecutionState appropriate for use with nv2a_vsh_cpu_* functions.
|
// Nv2aVshExecutionState appropriate for use with nv2a_vsh_cpu_* functions.
|
||||||
Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state(
|
Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state(
|
||||||
Nv2aVshCPUXVSSExecutionState *state, float *context_regs,
|
Nv2aVshCPUXVSSExecutionState *state, float *context_regs);
|
||||||
bool *context_dirty);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
|
@ -2,9 +2,9 @@
|
|||||||
|
|
||||||
#include "nv2a_vsh_emulator.h"
|
#include "nv2a_vsh_emulator.h"
|
||||||
|
|
||||||
#define CHECK_REGISTER(bank, index, actual) \
|
#define CHECK_REGISTER(bank, index, actual) \
|
||||||
do { \
|
do { \
|
||||||
float *expected = bank + index * 4; \
|
float *expected = bank + index * 4; \
|
||||||
BOOST_TEST((expected)[0] == (actual)[0]); \
|
BOOST_TEST((expected)[0] == (actual)[0]); \
|
||||||
BOOST_TEST((expected)[1] == (actual)[1]); \
|
BOOST_TEST((expected)[1] == (actual)[1]); \
|
||||||
BOOST_TEST((expected)[2] == (actual)[2]); \
|
BOOST_TEST((expected)[2] == (actual)[2]); \
|
||||||
@ -84,4 +84,41 @@ BOOST_AUTO_TEST_CASE(step_trivial) {
|
|||||||
CHECK_REGISTER(state.output_regs, NV2AOR_TEX2, &full_state.input_regs[reg]);
|
CHECK_REGISTER(state.output_regs, NV2AOR_TEX2, &full_state.input_regs[reg]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(program_context_tracked) {
|
||||||
|
Nv2aVshCPUFullExecutionState full_state;
|
||||||
|
Nv2aVshExecutionState state =
|
||||||
|
nv2a_vsh_emu_initialize_full_execution_state(&full_state);
|
||||||
|
uint32_t reg = 11 * 4;
|
||||||
|
full_state.input_regs[reg + 0] = 123.0f;
|
||||||
|
full_state.input_regs[reg + 1] = -456.0f;
|
||||||
|
full_state.input_regs[reg + 2] = 0.789f;
|
||||||
|
full_state.input_regs[reg + 3] = 32.64f;
|
||||||
|
|
||||||
|
// MOV c1.xyzw, v11
|
||||||
|
Nv2aVshStep steps[1];
|
||||||
|
Nv2aVshStep *step = &steps[0];
|
||||||
|
clear_step(step);
|
||||||
|
step->mac.opcode = NV2AOP_MOV;
|
||||||
|
step->mac.outputs[0].type = NV2ART_CONTEXT;
|
||||||
|
step->mac.outputs[0].index = 1;
|
||||||
|
step->mac.outputs[0].writemask = NV2AWM_XYZW;
|
||||||
|
step->mac.inputs[0].type = NV2ART_INPUT;
|
||||||
|
step->mac.inputs[0].index = 11;
|
||||||
|
step->is_final = true;
|
||||||
|
|
||||||
|
Nv2aVshProgram program;
|
||||||
|
program.steps = steps;
|
||||||
|
|
||||||
|
bool context_dirty[192] = {false};
|
||||||
|
nv2a_vsh_emu_execute_track_context_writes(&state, &program, context_dirty);
|
||||||
|
|
||||||
|
CHECK_REGISTER(state.context_regs, 1, &full_state.input_regs[reg]);
|
||||||
|
BOOST_TEST(!context_dirty[0]);
|
||||||
|
BOOST_TEST(context_dirty[1]);
|
||||||
|
for (uint32_t i = 2; i < 192; ++i) {
|
||||||
|
BOOST_TEST_INFO(i);
|
||||||
|
BOOST_TEST(!context_dirty[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
BOOST_AUTO_TEST_SUITE_END()
|
BOOST_AUTO_TEST_SUITE_END()
|
||||||
|
Loading…
Reference in New Issue
Block a user