mirror of
https://github.com/xemu-project/nv2a_vsh_cpu.git
synced 2024-11-23 01:39:38 +00:00
Merge pull request #3 from abaire/adds_context_tracking
Implements context write tracking.
This commit is contained in:
commit
3a57d8d635
@ -37,8 +37,7 @@ static inline void set_register(float *out, const float *in,
|
||||
out[3] = mult * in[swizzle[3]];
|
||||
}
|
||||
|
||||
static inline void fetch_value(float *out,
|
||||
const Nv2aVshExecutionState *state,
|
||||
static inline void fetch_value(float *out, const Nv2aVshExecutionState *state,
|
||||
const Nv2aVshInput *input) {
|
||||
const float *in;
|
||||
|
||||
@ -125,12 +124,11 @@ static inline void apply_operation(Nv2aVshExecutionState *state,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void apply(Nv2aVshExecutionState *state,
|
||||
const Nv2aVshStep *step) {
|
||||
static inline void prepare_inputs(float *mac_inputs, float *ilu_input,
|
||||
Nv2aVshExecutionState *state,
|
||||
const Nv2aVshStep *step) {
|
||||
// Copy the inputs for both operations first to prevent introducing order
|
||||
// dependent behavior.
|
||||
float mac_inputs[3 * 4];
|
||||
float ilu_input[4];
|
||||
if (step->mac.opcode) {
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
if (step->mac.inputs[i].type == NV2ART_NONE) {
|
||||
@ -142,6 +140,13 @@ static inline void apply(Nv2aVshExecutionState *state,
|
||||
if (step->ilu.opcode) {
|
||||
fetch_value(ilu_input, state, &step->ilu.inputs[0]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void apply(Nv2aVshExecutionState *state,
|
||||
const Nv2aVshStep *step) {
|
||||
float mac_inputs[3 * 4];
|
||||
float ilu_input[4];
|
||||
prepare_inputs(mac_inputs, &ilu_input, state, step);
|
||||
|
||||
if (step->mac.opcode) {
|
||||
apply_operation(state, &step->mac, mac_inputs);
|
||||
@ -151,6 +156,31 @@ static inline void apply(Nv2aVshExecutionState *state,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void apply_track_context_writes(Nv2aVshExecutionState *state,
|
||||
const Nv2aVshStep *step,
|
||||
bool *context_dirty) {
|
||||
float mac_inputs[3 * 4];
|
||||
float ilu_input[4];
|
||||
prepare_inputs(mac_inputs, &ilu_input, state, step);
|
||||
|
||||
if (step->mac.opcode) {
|
||||
apply_operation(state, &step->mac, mac_inputs);
|
||||
if (step->mac.outputs[0].type == NV2ART_CONTEXT) {
|
||||
context_dirty[step->mac.outputs[0].index] = true;
|
||||
} else if (step->mac.outputs[1].type == NV2ART_CONTEXT) {
|
||||
context_dirty[step->mac.outputs[1].index] = true;
|
||||
}
|
||||
}
|
||||
if (step->ilu.opcode) {
|
||||
apply_operation(state, &step->ilu, &ilu_input);
|
||||
if (step->ilu.outputs[0].type == NV2ART_CONTEXT) {
|
||||
context_dirty[step->ilu.outputs[0].index] = true;
|
||||
} else if (step->ilu.outputs[1].type == NV2ART_CONTEXT) {
|
||||
context_dirty[step->ilu.outputs[1].index] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state,
|
||||
const Nv2aVshProgram *program) {
|
||||
assert(state);
|
||||
@ -166,6 +196,23 @@ void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state,
|
||||
}
|
||||
}
|
||||
|
||||
void nv2a_vsh_emu_execute_track_context_writes(Nv2aVshExecutionState *state,
|
||||
const Nv2aVshProgram *program,
|
||||
bool *context_dirty) {
|
||||
assert(state);
|
||||
assert(program && program->steps);
|
||||
assert(context_dirty);
|
||||
|
||||
Nv2aVshStep *step = program->steps;
|
||||
while (true) {
|
||||
apply_track_context_writes(state, step, context_dirty);
|
||||
if (step->is_final) {
|
||||
break;
|
||||
}
|
||||
++step;
|
||||
}
|
||||
}
|
||||
|
||||
void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step) {
|
||||
apply(state, step);
|
||||
}
|
||||
|
@ -11,6 +11,12 @@ extern "C" {
|
||||
void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state,
|
||||
const Nv2aVshProgram *program);
|
||||
|
||||
// context_dirty is an array of 192 bools that will be set when writing to
|
||||
// entries in context_regs.
|
||||
void nv2a_vsh_emu_execute_track_context_writes(Nv2aVshExecutionState *state,
|
||||
const Nv2aVshProgram *program,
|
||||
bool *context_dirty);
|
||||
|
||||
// Emulates the given step by applying it to the given state.
|
||||
void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step);
|
||||
|
||||
|
@ -8,19 +8,17 @@ Nv2aVshExecutionState nv2a_vsh_emu_initialize_full_execution_state(
|
||||
Nv2aVshExecutionState ret = {
|
||||
(float *)state->input_regs, (float *)state->output_regs,
|
||||
(float *)state->temp_regs, (float *)state->context_regs,
|
||||
(float *)&state->address_reg, NULL};
|
||||
(float *)&state->address_reg};
|
||||
return ret;
|
||||
}
|
||||
|
||||
Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state(
|
||||
Nv2aVshCPUXVSSExecutionState *state, float *context_regs,
|
||||
bool *context_dirty) {
|
||||
Nv2aVshCPUXVSSExecutionState *state, float *context_regs) {
|
||||
memset(state, 0, sizeof(*state));
|
||||
state->context_regs = context_regs;
|
||||
state->context_dirty = context_dirty;
|
||||
Nv2aVshExecutionState ret = {
|
||||
(float *)state->input_regs, (float *)state->output_regs,
|
||||
(float *)state->temp_regs, (float *)state->context_regs,
|
||||
(float *)&state->address_reg, state->context_dirty};
|
||||
(float *)&state->address_reg};
|
||||
return ret;
|
||||
}
|
||||
|
@ -69,10 +69,6 @@ typedef struct Nv2aVshCPUXVSSExecutionState_ {
|
||||
// Context regs should be initialized to a flat array of 192 registers.
|
||||
float *context_regs;
|
||||
|
||||
// Optional array of 192 bools that will be set when writing to entries in
|
||||
// context_regs.
|
||||
bool *context_dirty;
|
||||
|
||||
float address_reg[4];
|
||||
} Nv2aVshCPUXVSSExecutionState;
|
||||
|
||||
@ -84,8 +80,7 @@ Nv2aVshExecutionState nv2a_vsh_emu_initialize_full_execution_state(
|
||||
// Initializes the given Nv2aVshCPUXVSSExecutionState and returns an
|
||||
// Nv2aVshExecutionState appropriate for use with nv2a_vsh_cpu_* functions.
|
||||
Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state(
|
||||
Nv2aVshCPUXVSSExecutionState *state, float *context_regs,
|
||||
bool *context_dirty);
|
||||
Nv2aVshCPUXVSSExecutionState *state, float *context_regs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
@ -2,9 +2,9 @@
|
||||
|
||||
#include "nv2a_vsh_emulator.h"
|
||||
|
||||
#define CHECK_REGISTER(bank, index, actual) \
|
||||
do { \
|
||||
float *expected = bank + index * 4; \
|
||||
#define CHECK_REGISTER(bank, index, actual) \
|
||||
do { \
|
||||
float *expected = bank + index * 4; \
|
||||
BOOST_TEST((expected)[0] == (actual)[0]); \
|
||||
BOOST_TEST((expected)[1] == (actual)[1]); \
|
||||
BOOST_TEST((expected)[2] == (actual)[2]); \
|
||||
@ -84,4 +84,41 @@ BOOST_AUTO_TEST_CASE(step_trivial) {
|
||||
CHECK_REGISTER(state.output_regs, NV2AOR_TEX2, &full_state.input_regs[reg]);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(program_context_tracked) {
|
||||
Nv2aVshCPUFullExecutionState full_state;
|
||||
Nv2aVshExecutionState state =
|
||||
nv2a_vsh_emu_initialize_full_execution_state(&full_state);
|
||||
uint32_t reg = 11 * 4;
|
||||
full_state.input_regs[reg + 0] = 123.0f;
|
||||
full_state.input_regs[reg + 1] = -456.0f;
|
||||
full_state.input_regs[reg + 2] = 0.789f;
|
||||
full_state.input_regs[reg + 3] = 32.64f;
|
||||
|
||||
// MOV c1.xyzw, v11
|
||||
Nv2aVshStep steps[1];
|
||||
Nv2aVshStep *step = &steps[0];
|
||||
clear_step(step);
|
||||
step->mac.opcode = NV2AOP_MOV;
|
||||
step->mac.outputs[0].type = NV2ART_CONTEXT;
|
||||
step->mac.outputs[0].index = 1;
|
||||
step->mac.outputs[0].writemask = NV2AWM_XYZW;
|
||||
step->mac.inputs[0].type = NV2ART_INPUT;
|
||||
step->mac.inputs[0].index = 11;
|
||||
step->is_final = true;
|
||||
|
||||
Nv2aVshProgram program;
|
||||
program.steps = steps;
|
||||
|
||||
bool context_dirty[192] = {false};
|
||||
nv2a_vsh_emu_execute_track_context_writes(&state, &program, context_dirty);
|
||||
|
||||
CHECK_REGISTER(state.context_regs, 1, &full_state.input_regs[reg]);
|
||||
BOOST_TEST(!context_dirty[0]);
|
||||
BOOST_TEST(context_dirty[1]);
|
||||
for (uint32_t i = 2; i < 192; ++i) {
|
||||
BOOST_TEST_INFO(i);
|
||||
BOOST_TEST(!context_dirty[i]);
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
||||
|
Loading…
Reference in New Issue
Block a user