Merge pull request #3 from abaire/adds_context_tracking

Implements context write tracking.
This commit is contained in:
Erik Abair 2022-06-24 20:02:51 -07:00 committed by GitHub
commit 3a57d8d635
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 103 additions and 20 deletions

View File

@ -37,8 +37,7 @@ static inline void set_register(float *out, const float *in,
out[3] = mult * in[swizzle[3]];
}
static inline void fetch_value(float *out,
const Nv2aVshExecutionState *state,
static inline void fetch_value(float *out, const Nv2aVshExecutionState *state,
const Nv2aVshInput *input) {
const float *in;
@ -125,12 +124,11 @@ static inline void apply_operation(Nv2aVshExecutionState *state,
}
}
static inline void apply(Nv2aVshExecutionState *state,
const Nv2aVshStep *step) {
static inline void prepare_inputs(float *mac_inputs, float *ilu_input,
Nv2aVshExecutionState *state,
const Nv2aVshStep *step) {
// Copy the inputs for both operations first to prevent introducing order
// dependent behavior.
float mac_inputs[3 * 4];
float ilu_input[4];
if (step->mac.opcode) {
for (uint32_t i = 0; i < 3; ++i) {
if (step->mac.inputs[i].type == NV2ART_NONE) {
@ -142,6 +140,13 @@ static inline void apply(Nv2aVshExecutionState *state,
if (step->ilu.opcode) {
fetch_value(ilu_input, state, &step->ilu.inputs[0]);
}
}
static inline void apply(Nv2aVshExecutionState *state,
const Nv2aVshStep *step) {
float mac_inputs[3 * 4];
float ilu_input[4];
prepare_inputs(mac_inputs, &ilu_input, state, step);
if (step->mac.opcode) {
apply_operation(state, &step->mac, mac_inputs);
@ -151,6 +156,31 @@ static inline void apply(Nv2aVshExecutionState *state,
}
}
static inline void apply_track_context_writes(Nv2aVshExecutionState *state,
const Nv2aVshStep *step,
bool *context_dirty) {
float mac_inputs[3 * 4];
float ilu_input[4];
prepare_inputs(mac_inputs, &ilu_input, state, step);
if (step->mac.opcode) {
apply_operation(state, &step->mac, mac_inputs);
if (step->mac.outputs[0].type == NV2ART_CONTEXT) {
context_dirty[step->mac.outputs[0].index] = true;
} else if (step->mac.outputs[1].type == NV2ART_CONTEXT) {
context_dirty[step->mac.outputs[1].index] = true;
}
}
if (step->ilu.opcode) {
apply_operation(state, &step->ilu, &ilu_input);
if (step->ilu.outputs[0].type == NV2ART_CONTEXT) {
context_dirty[step->ilu.outputs[0].index] = true;
} else if (step->ilu.outputs[1].type == NV2ART_CONTEXT) {
context_dirty[step->ilu.outputs[1].index] = true;
}
}
}
void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state,
const Nv2aVshProgram *program) {
assert(state);
@ -166,6 +196,23 @@ void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state,
}
}
void nv2a_vsh_emu_execute_track_context_writes(Nv2aVshExecutionState *state,
const Nv2aVshProgram *program,
bool *context_dirty) {
assert(state);
assert(program && program->steps);
assert(context_dirty);
Nv2aVshStep *step = program->steps;
while (true) {
apply_track_context_writes(state, step, context_dirty);
if (step->is_final) {
break;
}
++step;
}
}
void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step) {
apply(state, step);
}

View File

@ -11,6 +11,12 @@ extern "C" {
void nv2a_vsh_emu_execute(Nv2aVshExecutionState *state,
const Nv2aVshProgram *program);
// context_dirty is an array of 192 bools that will be set when writing to
// entries in context_regs.
void nv2a_vsh_emu_execute_track_context_writes(Nv2aVshExecutionState *state,
const Nv2aVshProgram *program,
bool *context_dirty);
// Emulates the given step by applying it to the given state.
void nv2a_vsh_emu_apply(Nv2aVshExecutionState *state, const Nv2aVshStep *step);

View File

@ -8,19 +8,17 @@ Nv2aVshExecutionState nv2a_vsh_emu_initialize_full_execution_state(
Nv2aVshExecutionState ret = {
(float *)state->input_regs, (float *)state->output_regs,
(float *)state->temp_regs, (float *)state->context_regs,
(float *)&state->address_reg, NULL};
(float *)&state->address_reg};
return ret;
}
Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state(
Nv2aVshCPUXVSSExecutionState *state, float *context_regs,
bool *context_dirty) {
Nv2aVshCPUXVSSExecutionState *state, float *context_regs) {
memset(state, 0, sizeof(*state));
state->context_regs = context_regs;
state->context_dirty = context_dirty;
Nv2aVshExecutionState ret = {
(float *)state->input_regs, (float *)state->output_regs,
(float *)state->temp_regs, (float *)state->context_regs,
(float *)&state->address_reg, state->context_dirty};
(float *)&state->address_reg};
return ret;
}

View File

@ -69,10 +69,6 @@ typedef struct Nv2aVshCPUXVSSExecutionState_ {
// Context regs should be initialized to a flat array of 192 registers.
float *context_regs;
// Optional array of 192 bools that will be set when writing to entries in
// context_regs.
bool *context_dirty;
float address_reg[4];
} Nv2aVshCPUXVSSExecutionState;
@ -84,8 +80,7 @@ Nv2aVshExecutionState nv2a_vsh_emu_initialize_full_execution_state(
// Initializes the given Nv2aVshCPUXVSSExecutionState and returns an
// Nv2aVshExecutionState appropriate for use with nv2a_vsh_cpu_* functions.
Nv2aVshExecutionState nv2a_vsh_emu_initialize_xss_execution_state(
Nv2aVshCPUXVSSExecutionState *state, float *context_regs,
bool *context_dirty);
Nv2aVshCPUXVSSExecutionState *state, float *context_regs);
#ifdef __cplusplus
} // extern "C"

View File

@ -2,9 +2,9 @@
#include "nv2a_vsh_emulator.h"
#define CHECK_REGISTER(bank, index, actual) \
do { \
float *expected = bank + index * 4; \
#define CHECK_REGISTER(bank, index, actual) \
do { \
float *expected = bank + index * 4; \
BOOST_TEST((expected)[0] == (actual)[0]); \
BOOST_TEST((expected)[1] == (actual)[1]); \
BOOST_TEST((expected)[2] == (actual)[2]); \
@ -84,4 +84,41 @@ BOOST_AUTO_TEST_CASE(step_trivial) {
CHECK_REGISTER(state.output_regs, NV2AOR_TEX2, &full_state.input_regs[reg]);
}
BOOST_AUTO_TEST_CASE(program_context_tracked) {
Nv2aVshCPUFullExecutionState full_state;
Nv2aVshExecutionState state =
nv2a_vsh_emu_initialize_full_execution_state(&full_state);
uint32_t reg = 11 * 4;
full_state.input_regs[reg + 0] = 123.0f;
full_state.input_regs[reg + 1] = -456.0f;
full_state.input_regs[reg + 2] = 0.789f;
full_state.input_regs[reg + 3] = 32.64f;
// MOV c1.xyzw, v11
Nv2aVshStep steps[1];
Nv2aVshStep *step = &steps[0];
clear_step(step);
step->mac.opcode = NV2AOP_MOV;
step->mac.outputs[0].type = NV2ART_CONTEXT;
step->mac.outputs[0].index = 1;
step->mac.outputs[0].writemask = NV2AWM_XYZW;
step->mac.inputs[0].type = NV2ART_INPUT;
step->mac.inputs[0].index = 11;
step->is_final = true;
Nv2aVshProgram program;
program.steps = steps;
bool context_dirty[192] = {false};
nv2a_vsh_emu_execute_track_context_writes(&state, &program, context_dirty);
CHECK_REGISTER(state.context_regs, 1, &full_state.input_regs[reg]);
BOOST_TEST(!context_dirty[0]);
BOOST_TEST(context_dirty[1]);
for (uint32_t i = 2; i < 192; ++i) {
BOOST_TEST_INFO(i);
BOOST_TEST(!context_dirty[i]);
}
}
BOOST_AUTO_TEST_SUITE_END()