Adds disassembler.

This commit is contained in:
Erik Abair 2022-06-18 18:37:02 -07:00
parent 1eae45413f
commit a5debc73de
8 changed files with 1236 additions and 54 deletions

View File

@ -13,7 +13,7 @@ include (ExternalProject)
include(FindPkgConfig)
find_package(
Boost 1.70
Boost 1.74
COMPONENTS
unit_test_framework
REQUIRED
@ -25,8 +25,6 @@ include_directories("${Boost_INCLUDE_DIR}" "${GENERATED_FILES_DIR}")
set(CMAKE_CXX_FLAGS_DEBUG "-ggdb -O0")
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
# xbdm executable ------------------------------------
add_library(
nv2a_vsh_cpu
src/nv2a_vsh_cpu.c
@ -39,6 +37,18 @@ target_include_directories(
src
)
add_library(
nv2a_vsh_disassembler
src/nv2a_vsh_disassembler.c
src/nv2a_vsh_disassembler.h
)
target_include_directories(
nv2a_vsh_disassembler
PRIVATE
src
)
# Tests ----------------------------------------------
add_executable(
operations_tests
@ -58,3 +68,23 @@ target_link_libraries(
)
add_test(NAME operations_tests COMMAND operations_tests)
add_dependencies(operations_tests nv2a_vsh_cpu)
add_executable(
disassembler_tests
test/disassembler/test_main.cpp
test/disassembler/test_basic.cpp
)
target_include_directories(
disassembler_tests
PRIVATE src
PRIVATE test
)
target_link_libraries(
disassembler_tests
LINK_PRIVATE
nv2a_vsh_disassembler
${Boost_LIBRARIES}
)
add_test(NAME disassembler_tests COMMAND disassembler_tests)
add_dependencies(disassembler_tests nv2a_vsh_disassembler)

View File

@ -3,11 +3,11 @@
#include <math.h>
#include <string.h>
void nv2a_vsh_cpu_mov(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
void nv2a_vsh_cpu_mov(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
memcpy(out, a, sizeof(*out));
}
void nv2a_vsh_cpu_arl(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
void nv2a_vsh_cpu_arl(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
float val = floorf(a->reg.x + 0.001f);
out->reg.x = val;
out->reg.y = val;
@ -15,32 +15,32 @@ void nv2a_vsh_cpu_arl(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
out->reg.w = val;
}
void nv2a_vsh_cpu_mul(nv2a_vsh_register *out, const nv2a_vsh_register *a,
const nv2a_vsh_register *b) {
void nv2a_vsh_cpu_mul(Nv2aVshRegister *out, const Nv2aVshRegister *a,
const Nv2aVshRegister *b) {
out->reg.x = a->reg.x * b->reg.x;
out->reg.y = a->reg.y * b->reg.y;
out->reg.z = a->reg.z * b->reg.z;
out->reg.w = a->reg.w * b->reg.w;
}
void nv2a_vsh_cpu_add(nv2a_vsh_register *out, const nv2a_vsh_register *a,
const nv2a_vsh_register *b) {
void nv2a_vsh_cpu_add(Nv2aVshRegister *out, const Nv2aVshRegister *a,
const Nv2aVshRegister *b) {
out->reg.x = a->reg.x + b->reg.x;
out->reg.y = a->reg.y + b->reg.y;
out->reg.z = a->reg.z + b->reg.z;
out->reg.w = a->reg.w + b->reg.w;
}
void nv2a_vsh_cpu_mad(nv2a_vsh_register *out, const nv2a_vsh_register *a,
const nv2a_vsh_register *b, const nv2a_vsh_register *c) {
void nv2a_vsh_cpu_mad(Nv2aVshRegister *out, const Nv2aVshRegister *a,
const Nv2aVshRegister *b, const Nv2aVshRegister *c) {
out->reg.x = a->reg.x * b->reg.x + c->reg.x;
out->reg.y = a->reg.y * b->reg.y + c->reg.y;
out->reg.z = a->reg.z * b->reg.z + c->reg.z;
out->reg.w = a->reg.w * b->reg.w + c->reg.w;
}
void nv2a_vsh_cpu_dp3(nv2a_vsh_register *out, const nv2a_vsh_register *a,
const nv2a_vsh_register *b) {
void nv2a_vsh_cpu_dp3(Nv2aVshRegister *out, const Nv2aVshRegister *a,
const Nv2aVshRegister *b) {
float result =
a->reg.x * b->reg.x + a->reg.y * b->reg.y + a->reg.z * b->reg.z;
out->reg.x = result;
@ -49,8 +49,8 @@ void nv2a_vsh_cpu_dp3(nv2a_vsh_register *out, const nv2a_vsh_register *a,
out->reg.w = result;
}
void nv2a_vsh_cpu_dph(nv2a_vsh_register *out, const nv2a_vsh_register *a,
const nv2a_vsh_register *b) {
void nv2a_vsh_cpu_dph(Nv2aVshRegister *out, const Nv2aVshRegister *a,
const Nv2aVshRegister *b) {
float result = a->reg.x * b->reg.x + a->reg.y * b->reg.y +
a->reg.z * b->reg.z + b->reg.w;
out->reg.x = result;
@ -59,8 +59,8 @@ void nv2a_vsh_cpu_dph(nv2a_vsh_register *out, const nv2a_vsh_register *a,
out->reg.w = result;
}
void nv2a_vsh_cpu_dp4(nv2a_vsh_register *out, const nv2a_vsh_register *a,
const nv2a_vsh_register *b) {
void nv2a_vsh_cpu_dp4(Nv2aVshRegister *out, const Nv2aVshRegister *a,
const Nv2aVshRegister *b) {
float result = a->reg.x * b->reg.x + a->reg.y * b->reg.y +
a->reg.z * b->reg.z + a->reg.w * b->reg.w;
out->reg.x = result;
@ -69,47 +69,47 @@ void nv2a_vsh_cpu_dp4(nv2a_vsh_register *out, const nv2a_vsh_register *a,
out->reg.w = result;
}
void nv2a_vsh_cpu_dst(nv2a_vsh_register *out, const nv2a_vsh_register *a,
const nv2a_vsh_register *b) {
void nv2a_vsh_cpu_dst(Nv2aVshRegister *out, const Nv2aVshRegister *a,
const Nv2aVshRegister *b) {
out->reg.x = 1.0f;
out->reg.y = a->reg.y * b->reg.y;
out->reg.z = a->reg.z;
out->reg.w = b->reg.w;
}
void nv2a_vsh_cpu_min(nv2a_vsh_register *out, const nv2a_vsh_register *a,
const nv2a_vsh_register *b) {
void nv2a_vsh_cpu_min(Nv2aVshRegister *out, const Nv2aVshRegister *a,
const Nv2aVshRegister *b) {
out->reg.x = a->reg.x < b->reg.x ? a->reg.x : b->reg.x;
out->reg.y = a->reg.y < b->reg.y ? a->reg.y : b->reg.y;
out->reg.z = a->reg.z < b->reg.z ? a->reg.z : b->reg.z;
out->reg.w = a->reg.w < b->reg.w ? a->reg.w : b->reg.w;
}
void nv2a_vsh_cpu_max(nv2a_vsh_register *out, const nv2a_vsh_register *a,
const nv2a_vsh_register *b) {
void nv2a_vsh_cpu_max(Nv2aVshRegister *out, const Nv2aVshRegister *a,
const Nv2aVshRegister *b) {
out->reg.x = a->reg.x > b->reg.x ? a->reg.x : b->reg.x;
out->reg.y = a->reg.y > b->reg.y ? a->reg.y : b->reg.y;
out->reg.z = a->reg.z > b->reg.z ? a->reg.z : b->reg.z;
out->reg.w = a->reg.w > b->reg.w ? a->reg.w : b->reg.w;
}
void nv2a_vsh_cpu_slt(nv2a_vsh_register *out, const nv2a_vsh_register *a,
const nv2a_vsh_register *b) {
void nv2a_vsh_cpu_slt(Nv2aVshRegister *out, const Nv2aVshRegister *a,
const Nv2aVshRegister *b) {
out->reg.x = a->reg.x < b->reg.x ? 1.0f : 0.0f;
out->reg.y = a->reg.y < b->reg.y ? 1.0f : 0.0f;
out->reg.z = a->reg.z < b->reg.z ? 1.0f : 0.0f;
out->reg.w = a->reg.w < b->reg.w ? 1.0f : 0.0f;
}
void nv2a_vsh_cpu_sge(nv2a_vsh_register *out, const nv2a_vsh_register *a,
const nv2a_vsh_register *b) {
void nv2a_vsh_cpu_sge(Nv2aVshRegister *out, const Nv2aVshRegister *a,
const Nv2aVshRegister *b) {
out->reg.x = a->reg.x >= b->reg.x ? 1.0f : 0.0f;
out->reg.y = a->reg.y >= b->reg.y ? 1.0f : 0.0f;
out->reg.z = a->reg.z >= b->reg.z ? 1.0f : 0.0f;
out->reg.w = a->reg.w >= b->reg.w ? 1.0f : 0.0f;
}
void nv2a_vsh_cpu_rcp(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
void nv2a_vsh_cpu_rcp(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
float result =
(a->reg.x == 1.0f ? 1.0f
: (a->reg.x == 0.0f ? INFINITY : 1.0f / a->reg.x));
@ -119,7 +119,7 @@ void nv2a_vsh_cpu_rcp(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
out->reg.w = result;
}
void nv2a_vsh_cpu_rcc(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
void nv2a_vsh_cpu_rcc(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
// TODO: Validate this on HW.
float result;
if (a->reg.x == 1.0f) {
@ -143,7 +143,7 @@ void nv2a_vsh_cpu_rcc(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
out->reg.w = result;
}
void nv2a_vsh_cpu_rsq(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
void nv2a_vsh_cpu_rsq(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
float result = (a->reg.x == 1.0f
? 1.0f
: (a->reg.x == 0.0f ? INFINITY : 1.0f / sqrtf(a->reg.x)));
@ -153,7 +153,7 @@ void nv2a_vsh_cpu_rsq(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
out->reg.w = result;
}
void nv2a_vsh_cpu_exp(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
void nv2a_vsh_cpu_exp(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
float tmp = floorf(a->reg.x);
out->reg.x = powf(2.0f, tmp);
out->reg.y = a->reg.x - tmp;
@ -161,7 +161,7 @@ void nv2a_vsh_cpu_exp(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
out->reg.w = 1.0f;
}
void nv2a_vsh_cpu_log(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
void nv2a_vsh_cpu_log(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
// TODO: Validate this on HW.
float tmp = fabsf(a->reg.x);
if (tmp == 0.0f) {
@ -182,7 +182,7 @@ void nv2a_vsh_cpu_log(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
out->reg.w = 1.0f;
}
void nv2a_vsh_cpu_lit(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
void nv2a_vsh_cpu_lit(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
static const float kMax = 127.9961f;
out->reg.x = 1.0f;

View File

@ -5,27 +5,26 @@
extern "C" {
#endif
struct nv2a_vsh_register_components {
struct Nv2aVshRegisterComponents {
float x;
float y;
float z;
float w;
};
typedef union nv2a_vsh_register_ {
struct nv2a_vsh_register_components reg;
typedef union Nv2aVshRegister_ {
struct Nv2aVshRegisterComponents reg;
float raw[4];
} nv2a_vsh_register;
} Nv2aVshRegister;
#define OP_1(name) \
void nv2a_vsh_cpu_##name(nv2a_vsh_register *out, const nv2a_vsh_register *a)
#define OP_2(name) \
void nv2a_vsh_cpu_##name(nv2a_vsh_register *out, const nv2a_vsh_register *a, \
const nv2a_vsh_register *b)
#define OP_3(name) \
void nv2a_vsh_cpu_##name(nv2a_vsh_register *out, const nv2a_vsh_register *a, \
const nv2a_vsh_register *b, \
const nv2a_vsh_register *c)
void nv2a_vsh_cpu_##name(Nv2aVshRegister *out, const Nv2aVshRegister *a)
#define OP_2(name) \
void nv2a_vsh_cpu_##name(Nv2aVshRegister *out, const Nv2aVshRegister *a, \
const Nv2aVshRegister *b)
#define OP_3(name) \
void nv2a_vsh_cpu_##name(Nv2aVshRegister *out, const Nv2aVshRegister *a, \
const Nv2aVshRegister *b, const Nv2aVshRegister *c)
OP_1(mov);
OP_1(arl);
@ -52,7 +51,7 @@ OP_1(lit);
#undef OP_3
#ifdef __cplusplus
};
}; // extern "C"
#endif
#endif // NV2A_VSH_CPU_SRC_NV2A_VSH_CPU_H_

445
src/nv2a_vsh_disassembler.c Normal file
View File

@ -0,0 +1,445 @@
#include "nv2a_vsh_disassembler.h"
#include <stdlib.h>
#define EXTRACT(token, index, start, size) \
(token[(index)] >> start) & ~(0xFFFFFFFF << size)
typedef enum Nv2aVshInputType_ {
NV2AIT_NONE = 0,
NV2AIT_TEMP = 1,
NV2AIT_INPUT = 2,
NV2AIT_CONTEXT = 3,
} Nv2aVshInputType;
static const Nv2aVshRegisterType kInputTypeToGeneric[] = {
NV2ART_NONE,
NV2ART_TEMPORARY,
NV2ART_INPUT,
NV2ART_CONTEXT,
};
typedef enum Nv2aVshILUOpcode_ {
NV2AILU_NOP = 0,
NV2AILU_MOV = 1,
NV2AILU_RCP = 2,
NV2AILU_RCC = 3,
NV2AILU_RSQ = 4,
NV2AILU_EXP = 5,
NV2AILU_LOG = 6,
NV2AILU_LIT = 7
} Nv2aVshILUOpcode;
static const Nv2aVshOpcode kILUOpcodeToGeneric[] = {
NV2AOP_NOP, NV2AOP_MOV, NV2AOP_RCP, NV2AOP_RCC,
NV2AOP_RSQ, NV2AOP_EXP, NV2AOP_LOG, NV2AOP_LIT};
typedef enum Nv2aVshMACOpcode_ {
NV2AMAC_NOP = 0,
NV2AMAC_MOV = 1,
NV2AMAC_MUL = 2,
NV2AMAC_ADD = 3,
NV2AMAC_MAD = 4,
NV2AMAC_DP3 = 5,
NV2AMAC_DPH = 6,
NV2AMAC_DP4 = 7,
NV2AMAC_DST = 8,
NV2AMAC_MIN = 9,
NV2AMAC_MAX = 10,
NV2AMAC_SLT = 11,
NV2AMAC_SGE = 12,
NV2AMAC_ARL = 13
} Nv2aVshMACOpcode;
static const Nv2aVshOpcode kMACOpcodeToGeneric[] = {
NV2AOP_NOP, NV2AOP_MOV, NV2AOP_MUL, NV2AOP_ADD, NV2AOP_MAD,
NV2AOP_DP3, NV2AOP_DPH, NV2AOP_DP4, NV2AOP_DST, NV2AOP_MIN,
NV2AOP_MAX, NV2AOP_SLT, NV2AOP_SGE, NV2AOP_ARL,
};
void nv2a_vsh_program_destroy(Nv2aVshProgram *program) {
if (program && program->steps) {
free(program->steps);
program->steps = NULL;
}
}
static inline uint32_t parse_a_swizzle_w(const uint32_t *token) {
return EXTRACT(token, 1, 0, 2);
}
static inline uint32_t parse_a_swizzle_z(const uint32_t *token) {
return EXTRACT(token, 1, 2, 2);
}
static inline uint32_t parse_a_swizzle_y(const uint32_t *token) {
return EXTRACT(token, 1, 4, 2);
}
static inline uint32_t parse_a_swizzle_x(const uint32_t *token) {
return EXTRACT(token, 1, 6, 2);
}
static inline bool parse_a_negate(const uint32_t *token) {
return EXTRACT(token, 1, 8, 1);
}
static inline uint32_t parse_input_reg(const uint32_t *token) {
return EXTRACT(token, 1, 9, 4);
}
static inline uint32_t parse_context_reg(const uint32_t *token) {
return EXTRACT(token, 1, 13, 8);
}
static inline Nv2aVshMACOpcode parse_mac_opcode(const uint32_t *token) {
return EXTRACT(token, 1, 21, 4);
}
static inline Nv2aVshILUOpcode parse_ilu_opcode(const uint32_t *token) {
return EXTRACT(token, 1, 25, 3);
}
static inline uint32_t parse_c_swizzle_w(const uint32_t *token) {
return EXTRACT(token, 2, 2, 2);
}
static inline uint32_t parse_c_swizzle_z(const uint32_t *token) {
return EXTRACT(token, 2, 4, 2);
}
static inline uint32_t parse_c_swizzle_y(const uint32_t *token) {
return EXTRACT(token, 2, 6, 2);
}
static inline uint32_t parse_c_swizzle_x(const uint32_t *token) {
return EXTRACT(token, 2, 8, 2);
}
static inline bool parse_c_negate(const uint32_t *token) {
return EXTRACT(token, 2, 10, 1);
}
static inline Nv2aVshInputType parse_b_type(const uint32_t *token) {
return EXTRACT(token, 2, 11, 2);
}
static inline uint32_t parse_b_temp_reg(const uint32_t *token) {
return EXTRACT(token, 2, 13, 4);
}
static inline uint32_t parse_b_swizzle_w(const uint32_t *token) {
return EXTRACT(token, 2, 17, 2);
}
static inline uint32_t parse_b_swizzle_z(const uint32_t *token) {
return EXTRACT(token, 2, 19, 2);
}
static inline uint32_t parse_b_swizzle_y(const uint32_t *token) {
return EXTRACT(token, 2, 21, 2);
}
static inline uint32_t parse_b_swizzle_x(const uint32_t *token) {
return EXTRACT(token, 2, 23, 2);
}
static inline bool parse_b_negate(const uint32_t *token) {
return EXTRACT(token, 2, 25, 1);
}
static inline Nv2aVshInputType parse_a_type(const uint32_t *token) {
return EXTRACT(token, 2, 26, 2);
}
static inline uint32_t parse_a_temp_reg(const uint32_t *token) {
return EXTRACT(token, 2, 28, 4);
}
static inline bool parse_final(const uint32_t *token) {
return EXTRACT(token, 3, 0, 1);
}
static inline bool parse_a0(const uint32_t *token) {
return EXTRACT(token, 3, 1, 1);
}
static inline bool parse_output_is_ilu(const uint32_t *token) {
return EXTRACT(token, 3, 2, 1);
}
static inline uint32_t parse_output_index(const uint32_t *token) {
return EXTRACT(token, 3, 3, 8);
}
static inline bool parse_out_is_output(const uint32_t *token) {
return EXTRACT(token, 3, 11, 1);
}
static inline uint32_t parse_output_writemask(const uint32_t *token) {
return EXTRACT(token, 3, 12, 4);
}
static inline uint32_t parse_temp_writemask_ilu(const uint32_t *token) {
return EXTRACT(token, 3, 16, 4);
}
static inline uint32_t parse_out_temp_reg(const uint32_t *token) {
return EXTRACT(token, 3, 20, 4);
}
static inline uint32_t parse_temp_writemask_mac(const uint32_t *token) {
return EXTRACT(token, 3, 24, 4);
}
static inline Nv2aVshInputType parse_c_type(const uint32_t *token) {
return EXTRACT(token, 3, 28, 2);
}
static inline uint32_t parse_c_temp_reg(const uint32_t *token) {
uint32_t low = EXTRACT(token, 3, 20, 2);
uint32_t high = EXTRACT(token, 2, 0, 2);
return ((high & 0x03) << 2) + (low & 0x03);
}
static inline void process_input(Nv2aVshInput *out, const uint32_t *token,
bool negate, uint32_t temp_reg, uint32_t x,
uint32_t y, uint32_t z, uint32_t w) {
switch (out->type) {
case NV2ART_TEMPORARY:
out->index = temp_reg;
break;
case NV2ART_INPUT:
out->index = parse_input_reg(token);
break;
case NV2ART_CONTEXT:
out->index = parse_context_reg(token);
out->is_relative = parse_a0(token);
break;
default:
return;
}
out->is_negated = negate;
out->swizzle[0] = x & 0xFF;
out->swizzle[1] = y & 0xFF;
out->swizzle[2] = z & 0xFF;
out->swizzle[3] = w & 0xFF;
}
static Nv2aVshParseResult parse_inputs(Nv2aVshInput *inputs,
const uint32_t *token) {
inputs[0].type = kInputTypeToGeneric[parse_a_type(token)];
if (inputs[0].type) {
process_input(&inputs[0], token, parse_a_negate(token),
parse_a_temp_reg(token), parse_a_swizzle_x(token),
parse_a_swizzle_y(token), parse_a_swizzle_z(token),
parse_a_swizzle_w(token));
}
inputs[1].type = kInputTypeToGeneric[parse_b_type(token)];
if (inputs[1].type) {
process_input(&inputs[1], token, parse_b_negate(token),
parse_b_temp_reg(token), parse_b_swizzle_x(token),
parse_b_swizzle_y(token), parse_b_swizzle_z(token),
parse_b_swizzle_w(token));
}
inputs[2].type = kInputTypeToGeneric[parse_c_type(token)];
if (inputs[2].type) {
process_input(&inputs[2], token, parse_c_negate(token),
parse_c_temp_reg(token), parse_c_swizzle_x(token),
parse_c_swizzle_y(token), parse_c_swizzle_z(token),
parse_c_swizzle_w(token));
}
return NV2AVPR_SUCCESS;
}
static Nv2aVshParseResult parse_outputs(Nv2aVshStep *out,
const uint32_t *token) {
out->mac.outputs[0].type = NV2ART_NONE;
out->mac.outputs[1].type = NV2ART_NONE;
out->ilu.outputs[0].type = NV2ART_NONE;
out->ilu.outputs[1].type = NV2ART_NONE;
uint32_t out_temp_register = parse_out_temp_reg(token);
uint32_t temp_writemask_mac = parse_temp_writemask_mac(token);
uint32_t temp_writemask_ilu = parse_temp_writemask_ilu(token);
if (temp_writemask_mac) {
out->mac.outputs[0].type = NV2ART_TEMPORARY;
out->mac.outputs[0].index = out_temp_register;
out->mac.outputs[0].writemask = temp_writemask_mac;
}
if (temp_writemask_ilu) {
out->ilu.outputs[0].type = NV2ART_TEMPORARY;
if (out->mac.opcode != NV2AOP_NOP) {
// Paired ILU instructions that write to temporary registers may only
// write to R1.
out->ilu.outputs[0].index = 1;
} else {
out->ilu.outputs[0].index = out_temp_register;
}
out->ilu.outputs[0].writemask = temp_writemask_ilu;
}
uint32_t output_writemask = parse_output_writemask(token);
if (output_writemask) {
Nv2aVshOutput *output = NULL;
if (parse_output_is_ilu(token)) {
output = &out->ilu.outputs[0];
if (output->type != NV2ART_NONE) {
++output;
}
} else {
output = &out->mac.outputs[0];
if (output->type != NV2ART_NONE) {
++output;
}
}
output->type = parse_out_is_output(token) ? NV2ART_OUTPUT : NV2ART_CONTEXT;
output->index = parse_output_index(token);
output->writemask = output_writemask;
}
if (out->mac.opcode == NV2AOP_ARL) {
if (out->mac.outputs[0].type != NV2ART_NONE) {
return NV2AVPR_ARL_CONFLICT;
}
out->mac.outputs[0].type = NV2ART_ADDRESS;
out->mac.outputs[0].index = 0;
out->mac.outputs[0].writemask = 0;
}
return NV2AVPR_SUCCESS;
}
Nv2aVshParseResult nv2a_vsh_parse_step(Nv2aVshStep *out,
const uint32_t *token) {
out->mac.opcode = kMACOpcodeToGeneric[parse_mac_opcode(token)];
out->ilu.opcode = kILUOpcodeToGeneric[parse_ilu_opcode(token)];
out->mac.inputs[0].type = NV2ART_NONE;
out->mac.inputs[1].type = NV2ART_NONE;
out->mac.inputs[2].type = NV2ART_NONE;
out->ilu.inputs[0].type = NV2ART_NONE;
out->ilu.inputs[1].type = NV2ART_NONE;
out->ilu.inputs[2].type = NV2ART_NONE;
out->is_final = parse_final(token);
if (!(out->mac.opcode || out->ilu.opcode)) {
return NV2AVPR_SUCCESS;
}
Nv2aVshParseResult result = parse_outputs(out, token);
if (result != NV2AVPR_SUCCESS) {
return result;
}
Nv2aVshInput inputs[3];
result = parse_inputs(inputs, token);
if (result != NV2AVPR_SUCCESS) {
return result;
}
out->mac.inputs[0] = inputs[0];
switch (out->mac.opcode) {
case NV2AOP_NOP:
break;
case NV2AOP_MOV:
case NV2AOP_ARL:
// These only use "a" which is already assigned.
break;
case NV2AOP_MUL:
case NV2AOP_DP3:
case NV2AOP_DP4:
case NV2AOP_DPH:
case NV2AOP_DST:
case NV2AOP_MIN:
case NV2AOP_MAX:
case NV2AOP_SGE:
case NV2AOP_SLT:
out->mac.inputs[1] = inputs[1];
break;
case NV2AOP_MAD:
out->mac.inputs[1] = inputs[1];
out->mac.inputs[2] = inputs[2];
break;
case NV2AOP_ADD:
out->mac.inputs[1] = inputs[2];
break;
default:
return NV2AVPR_BAD_MAC_OPCODE;
}
switch (out->ilu.opcode) {
default:
return NV2AVPR_BAD_ILU_OPCODE;
case NV2AOP_NOP:
break;
case NV2AOP_MOV:
case NV2AOP_LIT:
out->ilu.inputs[0] = inputs[2];
break;
// These commands operate on the "x" component only.
case NV2AOP_RCP:
case NV2AOP_RCC:
case NV2AOP_RSQ:
case NV2AOP_EXP:
case NV2AOP_LOG:
out->ilu.inputs[0] = inputs[2];
out->ilu.inputs[0].swizzle[1] = out->ilu.inputs[0].swizzle[0];
out->ilu.inputs[0].swizzle[2] = out->ilu.inputs[0].swizzle[0];
out->ilu.inputs[0].swizzle[3] = out->ilu.inputs[0].swizzle[0];
break;
}
return result;
}
Nv2aVshParseResult nv2a_vsh_parse_program(Nv2aVshProgram *out,
const uint32_t *program,
uint32_t program_size) {
if (!out) {
return NV2AVPR_BAD_OUTPUT;
}
if (!program_size || (program_size & 0x03)) {
return NV2AVPR_BAD_PROGRAM_SIZE;
}
if (!program) {
return NV2AVPR_BAD_PROGRAM;
}
uint32_t num_slots = program_size / 4;
out->steps = (Nv2aVshStep *)malloc(sizeof(Nv2aVshStep) * num_slots);
Nv2aVshStep *step = out->steps;
const uint32_t *opcodes = program;
for (uint32_t i = 0; i < num_slots; ++i, ++step, opcodes += 4) {
Nv2aVshParseResult result = nv2a_vsh_parse_step(step, opcodes);
if (result != NV2AVPR_SUCCESS) {
nv2a_vsh_program_destroy(out);
return result;
}
}
return NV2AVPR_SUCCESS;
}

159
src/nv2a_vsh_disassembler.h Normal file
View File

@ -0,0 +1,159 @@
// * Based on https://github.com/abaire/nv2a_vsh_asm which is
// * based on
// https://github.com/XboxDev/nxdk/blob/c4b69e7a82452c21aa2c62701fd3836755950f58/tools/vp20compiler/prog_instruction.c#L1
// * Mesa 3-D graphics library
// * Version: 7.3
// *
// * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
// * Copyright (C) 1999-2009 VMware, Inc. All Rights Reserved.
// *
// * Permission is hereby granted, free of charge, to any person obtaining a
// * copy of this software and associated documentation files (the "Software"),
// * to deal in the Software without restriction, including without limitation
// * the rights to use, copy, modify, merge, publish, distribute, sublicense,
// * and/or sell copies of the Software, and to permit persons to whom the
// * Software is furnished to do so, subject to the following conditions:
// *
// * The above copyright notice and this permission notice shall be included
// * in all copies or substantial portions of the Software.
// *
// * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
// * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
// * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV2A_VSH_CPU_SRC_NV2A_VSH_DISASSEMBLER_H_
#define NV2A_VSH_CPU_SRC_NV2A_VSH_DISASSEMBLER_H_
#include <stdbool.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef enum Nv2aVshOpcode_ {
NV2AOP_NOP = 0,
NV2AOP_MOV,
NV2AOP_MUL,
NV2AOP_ADD,
NV2AOP_MAD,
NV2AOP_DP3,
NV2AOP_DPH,
NV2AOP_DP4,
NV2AOP_DST,
NV2AOP_MIN,
NV2AOP_MAX,
NV2AOP_SLT,
NV2AOP_SGE,
NV2AOP_ARL,
NV2AOP_RCP,
NV2AOP_RCC,
NV2AOP_RSQ,
NV2AOP_EXP,
NV2AOP_LOG,
NV2AOP_LIT
} Nv2aVshOpcode;
typedef enum Nv2aVshSwizzle_ {
NV2ASW_X = 0,
NV2ASW_Y,
NV2ASW_Z,
NV2ASW_W,
} Nv2aVshSwizzle;
typedef enum Nv2aVshWritemask_ {
NV2AWM_W = 1,
NV2AWM_Z,
NV2AWM_ZW,
NV2AWM_Y,
NV2AWM_YW,
NV2AWM_YZ,
NV2AWM_YZW,
NV2AWM_X,
NV2AWM_XW,
NV2AWM_XZ,
NV2AWM_XZW,
NV2AWM_XY,
NV2AWM_XYW,
NV2AWM_XYZ,
NV2AWM_XYZW,
} Nv2aVshWritemask;
typedef enum Nv2aVshRegisterType_ {
NV2ART_NONE = 0, // This input/output slot is unused.
NV2ART_TEMPORARY,
NV2ART_INPUT,
NV2ART_OUTPUT,
NV2ART_CONTEXT,
NV2ART_ADDRESS, // A0
} Nv2aVshRegisterType;
typedef struct Nv2aVshOutput_ {
Nv2aVshRegisterType type;
uint32_t index;
Nv2aVshWritemask writemask;
} Nv2aVshOutput;
typedef struct Nv2aVshInput_ {
Nv2aVshRegisterType type;
uint32_t index;
uint8_t swizzle[4];
bool is_negated;
bool is_relative;
} Nv2aVshInput;
// Represents a single operation.
typedef struct Nv2aVshOperation_ {
Nv2aVshOpcode opcode;
Nv2aVshOutput outputs[2];
Nv2aVshInput inputs[3];
} Nv2aVshOperation;
typedef struct Nv2aVshStep_ {
Nv2aVshOperation mac;
Nv2aVshOperation ilu;
bool is_final;
} Nv2aVshStep;
typedef struct Nv2aVshProgram_ {
Nv2aVshStep *steps;
} Nv2aVshProgram;
typedef enum Nv2aVshParseResult_ {
NV2AVPR_SUCCESS = 0,
NV2AVPR_BAD_OUTPUT,
NV2AVPR_BAD_PROGRAM,
NV2AVPR_BAD_PROGRAM_SIZE,
NV2AVPR_ARL_CONFLICT,
NV2AVPR_BAD_MAC_OPCODE,
NV2AVPR_BAD_ILU_OPCODE,
} Nv2aVshParseResult;
void nv2a_vsh_program_destroy(Nv2aVshProgram *program);
// Disassemble the given token (which must be 4 uint32_t's) into an Nv2aVshStep.
Nv2aVshParseResult nv2a_vsh_parse_step(Nv2aVshStep *out, const uint32_t *token);
// Disassemble the given array of nv2a transform opcodes into an
// Nv2aVshProgram representation.
//
// out - Nv2aVshProgram which will be updated to contain the parsed
// steps.
// program - Flat array of integers containing the nv2a transform opcodes to be
// processed.
// program_size - Number of integers in `program`.
//
// Note: On success, the caller is responsible for calling
// `nv2a_vsh_program_destroy` to clean up the allocated program.
Nv2aVshParseResult nv2a_vsh_parse_program(Nv2aVshProgram *out,
const uint32_t *program,
uint32_t program_size);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // NV2A_VSH_CPU_SRC_NV2A_VSH_DISASSEMBLER_H_

View File

@ -0,0 +1,547 @@
#include <boost/test/unit_test.hpp>
#include "nv2a_vsh_disassembler.h"
BOOST_AUTO_TEST_SUITE(basic_disassembler_suite)
static void clear_step(Nv2aVshStep *out) {
out->is_final = false;
memset(out->mac.outputs, 0, sizeof(out->mac.outputs));
memset(out->mac.inputs, 0, sizeof(out->mac.inputs));
memset(out->ilu.outputs, 0, sizeof(out->ilu.outputs));
memset(out->ilu.inputs, 0, sizeof(out->ilu.inputs));
out->mac.opcode = NV2AOP_NOP;
out->mac.inputs[0].type = NV2ART_NONE;
out->mac.inputs[0].swizzle[0] = NV2ASW_X;
out->mac.inputs[0].swizzle[1] = NV2ASW_Y;
out->mac.inputs[0].swizzle[2] = NV2ASW_Z;
out->mac.inputs[0].swizzle[3] = NV2ASW_W;
out->mac.inputs[1].type = NV2ART_NONE;
out->mac.inputs[1].swizzle[0] = NV2ASW_X;
out->mac.inputs[1].swizzle[1] = NV2ASW_Y;
out->mac.inputs[1].swizzle[2] = NV2ASW_Z;
out->mac.inputs[1].swizzle[3] = NV2ASW_W;
out->mac.inputs[2].type = NV2ART_NONE;
out->mac.inputs[2].swizzle[0] = NV2ASW_X;
out->mac.inputs[2].swizzle[1] = NV2ASW_Y;
out->mac.inputs[2].swizzle[2] = NV2ASW_Z;
out->mac.inputs[2].swizzle[3] = NV2ASW_W;
out->mac.outputs[0].type = NV2ART_NONE;
out->mac.outputs[1].type = NV2ART_NONE;
out->ilu.opcode = NV2AOP_NOP;
out->ilu.inputs[0].type = NV2ART_NONE;
out->ilu.inputs[0].swizzle[0] = NV2ASW_X;
out->ilu.inputs[0].swizzle[1] = NV2ASW_Y;
out->ilu.inputs[0].swizzle[2] = NV2ASW_Z;
out->ilu.inputs[0].swizzle[3] = NV2ASW_W;
out->ilu.inputs[1].type = NV2ART_NONE;
out->ilu.inputs[1].swizzle[0] = NV2ASW_X;
out->ilu.inputs[1].swizzle[1] = NV2ASW_Y;
out->ilu.inputs[1].swizzle[2] = NV2ASW_Z;
out->ilu.inputs[1].swizzle[3] = NV2ASW_W;
out->ilu.inputs[2].type = NV2ART_NONE;
out->ilu.inputs[2].swizzle[0] = NV2ASW_X;
out->ilu.inputs[2].swizzle[1] = NV2ASW_Y;
out->ilu.inputs[2].swizzle[2] = NV2ASW_Z;
out->ilu.inputs[2].swizzle[3] = NV2ASW_W;
out->ilu.outputs[0].type = NV2ART_NONE;
out->ilu.outputs[1].type = NV2ART_NONE;
}
static void check_opcode(const Nv2aVshOperation &expected,
const Nv2aVshOperation &actual) {
for (int i = 0; i < 2; ++i) {
BOOST_TEST_INFO_SCOPE("Output " << i);
BOOST_TEST(expected.outputs[i].type == actual.outputs[i].type);
if (expected.outputs[i].type == NV2ART_NONE) {
continue;
}
BOOST_TEST(expected.outputs[i].index == actual.outputs[i].index);
BOOST_TEST(expected.outputs[i].writemask == actual.outputs[i].writemask);
}
for (int i = 0; i < 3; ++i) {
BOOST_TEST_INFO_SCOPE("Input " << i);
BOOST_TEST(expected.inputs[i].type == actual.inputs[i].type);
if (expected.inputs[i].type == NV2ART_NONE) {
continue;
}
BOOST_TEST(expected.inputs[i].index == actual.inputs[i].index);
BOOST_TEST(expected.inputs[i].is_negated == actual.inputs[i].is_negated);
if (expected.inputs[i].type == NV2ART_CONTEXT) {
BOOST_TEST(expected.inputs[i].is_relative ==
actual.inputs[i].is_relative);
}
BOOST_TEST(expected.inputs[i].swizzle[0] == actual.inputs[i].swizzle[0]);
BOOST_TEST(expected.inputs[i].swizzle[1] == actual.inputs[i].swizzle[1]);
BOOST_TEST(expected.inputs[i].swizzle[2] == actual.inputs[i].swizzle[2]);
BOOST_TEST(expected.inputs[i].swizzle[3] == actual.inputs[i].swizzle[3]);
}
}
static void check_result(const Nv2aVshStep &expected,
const Nv2aVshStep &actual) {
BOOST_TEST(expected.mac.opcode == actual.mac.opcode);
if (expected.mac.opcode != NV2AOP_NOP) {
BOOST_TEST_INFO("MAC");
check_opcode(expected.mac, actual.mac);
}
BOOST_TEST(expected.ilu.opcode == actual.ilu.opcode);
if (expected.ilu.opcode != NV2AOP_NOP) {
BOOST_TEST_INFO("ILU");
check_opcode(expected.ilu, actual.ilu);
}
BOOST_TEST(expected.is_final == actual.is_final);
}
BOOST_AUTO_TEST_CASE(step_mac_mov) {
// MOV oT2.xyzw, v11
static constexpr uint32_t kTest[][4] = {
{0x00000000, 0x0020161B, 0x0836106C, 0x2070F858},
};
Nv2aVshStep expected;
clear_step(&expected);
expected.mac.opcode = NV2AOP_MOV;
expected.mac.outputs[0].type = NV2ART_OUTPUT;
expected.mac.outputs[0].index = 11;
expected.mac.outputs[0].writemask = NV2AWM_XYZW;
expected.mac.inputs[0].type = NV2ART_INPUT;
expected.mac.inputs[0].index = 11;
Nv2aVshStep actual;
auto result = nv2a_vsh_parse_step(&actual, kTest[0]);
BOOST_TEST(result == NV2AVPR_SUCCESS);
check_result(expected, actual);
}
BOOST_AUTO_TEST_CASE(step_mac_mov_final) {
// MOV oT2.xyzw, v11
static constexpr uint32_t kTest[][4] = {
{0x00000000, 0x0020161B, 0x0836106C, 0x2070F859},
};
Nv2aVshStep expected;
clear_step(&expected);
expected.is_final = true;
expected.mac.opcode = NV2AOP_MOV;
expected.mac.outputs[0].type = NV2ART_OUTPUT;
expected.mac.outputs[0].index = 11;
expected.mac.outputs[0].writemask = NV2AWM_XYZW;
expected.mac.inputs[0].type = NV2ART_INPUT;
expected.mac.inputs[0].index = 11;
Nv2aVshStep actual;
auto result = nv2a_vsh_parse_step(&actual, kTest[0]);
BOOST_TEST(result == NV2AVPR_SUCCESS);
check_result(expected, actual);
}
BOOST_AUTO_TEST_CASE(step_mac_mad) {
// MAD oPos.xyz, R12, R1.x, c[59]
static constexpr uint32_t kTest[][4] = {
{0x00000000, 0x0087601B, 0xC400286C, 0x3070E801},
};
Nv2aVshStep expected;
clear_step(&expected);
expected.is_final = true;
expected.mac.opcode = NV2AOP_MAD;
expected.mac.outputs[0].type = NV2ART_OUTPUT;
expected.mac.outputs[0].index = 0;
expected.mac.outputs[0].writemask = NV2AWM_XYZ;
expected.mac.inputs[0].type = NV2ART_TEMPORARY;
expected.mac.inputs[0].index = 12;
expected.mac.inputs[1].type = NV2ART_TEMPORARY;
expected.mac.inputs[1].index = 1;
expected.mac.inputs[1].swizzle[1] = NV2ASW_X;
expected.mac.inputs[1].swizzle[2] = NV2ASW_X;
expected.mac.inputs[1].swizzle[3] = NV2ASW_X;
expected.mac.inputs[2].type = NV2ART_CONTEXT;
expected.mac.inputs[2].index = 59;
Nv2aVshStep actual;
auto result = nv2a_vsh_parse_step(&actual, kTest[0]);
BOOST_TEST(result == NV2AVPR_SUCCESS);
check_result(expected, actual);
}
BOOST_AUTO_TEST_CASE(step_mac_dp4) {
// DP4 oPos.z, v0, c[100]
static constexpr uint32_t kTest[][4] = {
{0x00000000, 0x00EC801B, 0x0836186C, 0x20702800},
};
Nv2aVshStep expected;
clear_step(&expected);
expected.mac.opcode = NV2AOP_DP4;
expected.mac.outputs[0].type = NV2ART_OUTPUT;
expected.mac.outputs[0].index = 0;
expected.mac.outputs[0].writemask = NV2AWM_Z;
expected.mac.inputs[0].type = NV2ART_INPUT;
expected.mac.inputs[0].index = 0;
expected.mac.inputs[1].type = NV2ART_CONTEXT;
expected.mac.inputs[1].index = 100;
Nv2aVshStep actual;
auto result = nv2a_vsh_parse_step(&actual, kTest[0]);
BOOST_TEST(result == NV2AVPR_SUCCESS);
check_result(expected, actual);
}
BOOST_AUTO_TEST_CASE(step_mac_mad_ambiguous) {
// MAD R0.z, R0.z, c[117].z, -c[117].w
static constexpr uint32_t kTest[][4] = {
{0x00000000, 0x008EA0AA, 0x05541FFC, 0x32000FF8},
{0x00000000, 0x008EA0AA, 0x0554BFFD, 0x72000000},
};
Nv2aVshStep expected;
clear_step(&expected);
expected.mac.opcode = NV2AOP_MAD;
expected.mac.outputs[0].type = NV2ART_TEMPORARY;
expected.mac.outputs[0].index = 0;
expected.mac.outputs[0].writemask = NV2AWM_Z;
expected.mac.inputs[0].type = NV2ART_TEMPORARY;
expected.mac.inputs[0].index = 0;
expected.mac.inputs[0].swizzle[0] = NV2ASW_Z;
expected.mac.inputs[0].swizzle[1] = NV2ASW_Z;
expected.mac.inputs[0].swizzle[2] = NV2ASW_Z;
expected.mac.inputs[0].swizzle[3] = NV2ASW_Z;
expected.mac.inputs[1].type = NV2ART_CONTEXT;
expected.mac.inputs[1].index = 117;
expected.mac.inputs[1].swizzle[0] = NV2ASW_Z;
expected.mac.inputs[1].swizzle[1] = NV2ASW_Z;
expected.mac.inputs[1].swizzle[2] = NV2ASW_Z;
expected.mac.inputs[1].swizzle[3] = NV2ASW_Z;
expected.mac.inputs[2].type = NV2ART_CONTEXT;
expected.mac.inputs[2].index = 117;
expected.mac.inputs[2].is_negated = true;
expected.mac.inputs[2].swizzle[0] = NV2ASW_W;
expected.mac.inputs[2].swizzle[1] = NV2ASW_W;
expected.mac.inputs[2].swizzle[2] = NV2ASW_W;
expected.mac.inputs[2].swizzle[3] = NV2ASW_W;
Nv2aVshStep actual;
auto result = nv2a_vsh_parse_step(&actual, kTest[0]);
BOOST_TEST(result == NV2AVPR_SUCCESS);
check_result(expected, actual);
result = nv2a_vsh_parse_step(&actual, kTest[1]);
BOOST_TEST(result == NV2AVPR_SUCCESS);
check_result(expected, actual);
}
BOOST_AUTO_TEST_CASE(step_mac_arl) {
// ARL A0, R0.x
static constexpr uint32_t kTest[][4] = {
{0x00000000, 0x01A00000, 0x0436106C, 0x20700FF8},
};
Nv2aVshStep expected;
clear_step(&expected);
expected.mac.opcode = NV2AOP_ARL;
expected.mac.outputs[0].type = NV2ART_ADDRESS;
expected.mac.outputs[0].index = 0;
expected.mac.inputs[0].type = NV2ART_TEMPORARY;
expected.mac.inputs[0].index = 0;
expected.mac.inputs[0].swizzle[0] = NV2ASW_X;
expected.mac.inputs[0].swizzle[1] = NV2ASW_X;
expected.mac.inputs[0].swizzle[2] = NV2ASW_X;
expected.mac.inputs[0].swizzle[3] = NV2ASW_X;
Nv2aVshStep actual;
auto result = nv2a_vsh_parse_step(&actual, kTest[0]);
BOOST_TEST(result == NV2AVPR_SUCCESS);
check_result(expected, actual);
}
BOOST_AUTO_TEST_CASE(step_address_relative) {
// ADD R0.xy, c[A0+121].zw, -c[A0+121].xy
static constexpr uint32_t kTest[][4] = {
{0x00000000, 0x006F20BF, 0x9C001456, 0x7C000002},
};
Nv2aVshStep expected;
clear_step(&expected);
expected.mac.opcode = NV2AOP_ADD;
expected.mac.outputs[0].type = NV2ART_TEMPORARY;
expected.mac.outputs[0].index = 0;
expected.mac.outputs[0].writemask = NV2AWM_XY;
expected.mac.inputs[0].type = NV2ART_CONTEXT;
expected.mac.inputs[0].index = 121;
expected.mac.inputs[0].is_relative = true;
expected.mac.inputs[0].swizzle[0] = NV2ASW_Z;
expected.mac.inputs[0].swizzle[1] = NV2ASW_W;
expected.mac.inputs[0].swizzle[2] = NV2ASW_W;
expected.mac.inputs[0].swizzle[3] = NV2ASW_W;
expected.mac.inputs[1].type = NV2ART_CONTEXT;
expected.mac.inputs[1].index = 121;
expected.mac.inputs[1].is_negated = true;
expected.mac.inputs[1].is_relative = true;
expected.mac.inputs[1].swizzle[0] = NV2ASW_X;
expected.mac.inputs[1].swizzle[1] = NV2ASW_Y;
expected.mac.inputs[1].swizzle[2] = NV2ASW_Y;
expected.mac.inputs[1].swizzle[3] = NV2ASW_Y;
Nv2aVshStep actual;
auto result = nv2a_vsh_parse_step(&actual, kTest[0]);
BOOST_TEST(result == NV2AVPR_SUCCESS);
check_result(expected, actual);
}
BOOST_AUTO_TEST_CASE(step_ilu_rcp) {
// RCP oFog.xyzw, v0.w
static constexpr uint32_t kTest[][4] = {
{0x00000000, 0x0400001B, 0x083613FC, 0x2070F82C},
};
Nv2aVshStep expected;
clear_step(&expected);
expected.ilu.opcode = NV2AOP_RCP;
expected.ilu.outputs[0].type = NV2ART_OUTPUT;
expected.ilu.outputs[0].index = 5;
expected.ilu.outputs[0].writemask = NV2AWM_XYZW;
expected.ilu.inputs[0].type = NV2ART_INPUT;
expected.ilu.inputs[0].index = 0;
expected.ilu.inputs[0].swizzle[0] = NV2ASW_W;
expected.ilu.inputs[0].swizzle[1] = NV2ASW_W;
expected.ilu.inputs[0].swizzle[2] = NV2ASW_W;
expected.ilu.inputs[0].swizzle[3] = NV2ASW_W;
Nv2aVshStep actual;
auto result = nv2a_vsh_parse_step(&actual, kTest[0]);
BOOST_TEST(result == NV2AVPR_SUCCESS);
check_result(expected, actual);
}
BOOST_AUTO_TEST_CASE(step_mac_mul) {
// MUL oPos.xyz, R12.xyz, c[58].xyz
static constexpr uint32_t kTest[][4] = {
{0x00000000, 0x0047401A, 0xC434186C, 0x2070E800},
};
Nv2aVshStep expected;
clear_step(&expected);
expected.mac.opcode = NV2AOP_MUL;
expected.mac.outputs[0].type = NV2ART_OUTPUT;
expected.mac.outputs[0].index = 0;
expected.mac.outputs[0].writemask = NV2AWM_XYZ;
expected.mac.inputs[0].type = NV2ART_TEMPORARY;
expected.mac.inputs[0].index = 12;
expected.mac.inputs[0].swizzle[3] = NV2ASW_Z;
expected.mac.inputs[1].type = NV2ART_CONTEXT;
expected.mac.inputs[1].index = 58;
expected.mac.inputs[1].swizzle[3] = NV2ASW_Z;
Nv2aVshStep actual;
auto result = nv2a_vsh_parse_step(&actual, kTest[0]);
BOOST_TEST(result == NV2AVPR_SUCCESS);
check_result(expected, actual);
}
BOOST_AUTO_TEST_CASE(step_paired_mul_mov) {
// MUL R2.xyzw, R1, c[0] + MOV oD1.xyzw, v4
static constexpr uint32_t kTest[][4] = {
{0x00000000, 0x0240081B, 0x1436186C, 0x2F20F824},
};
Nv2aVshStep expected;
clear_step(&expected);
expected.mac.opcode = NV2AOP_MUL;
expected.mac.outputs[0].type = NV2ART_TEMPORARY;
expected.mac.outputs[0].index = 2;
expected.mac.outputs[0].writemask = NV2AWM_XYZW;
expected.mac.inputs[0].type = NV2ART_TEMPORARY;
expected.mac.inputs[0].index = 1;
expected.mac.inputs[1].type = NV2ART_CONTEXT;
expected.mac.inputs[1].index = 0;
expected.ilu.opcode = NV2AOP_MOV;
expected.ilu.outputs[0].type = NV2ART_OUTPUT;
expected.ilu.outputs[0].index = 4;
expected.ilu.outputs[0].writemask = NV2AWM_XYZW;
expected.ilu.inputs[0].type = NV2ART_INPUT;
expected.ilu.inputs[0].index = 4;
Nv2aVshStep actual;
auto result = nv2a_vsh_parse_step(&actual, kTest[0]);
BOOST_TEST(result == NV2AVPR_SUCCESS);
check_result(expected, actual);
}
BOOST_AUTO_TEST_CASE(step_paired_mov_rcp) {
// MOV oD0.xyzw, v3 + RCP R1.w, R1.w
static constexpr uint32_t kTest[][4] = {
{0x00000000, 0x0420061B, 0x083613FC, 0x5011F818},
};
Nv2aVshStep expected;
clear_step(&expected);
expected.mac.opcode = NV2AOP_MOV;
expected.mac.outputs[0].type = NV2ART_OUTPUT;
expected.mac.outputs[0].index = 3;
expected.mac.outputs[0].writemask = NV2AWM_XYZW;
expected.mac.inputs[0].type = NV2ART_INPUT;
expected.mac.inputs[0].index = 3;
expected.ilu.opcode = NV2AOP_RCP;
expected.ilu.outputs[0].type = NV2ART_TEMPORARY;
expected.ilu.outputs[0].index = 1;
expected.ilu.outputs[0].writemask = NV2AWM_W;
expected.ilu.inputs[0].type = NV2ART_TEMPORARY;
expected.ilu.inputs[0].index = 1;
expected.ilu.inputs[0].swizzle[0] = NV2ASW_W;
expected.ilu.inputs[0].swizzle[1] = NV2ASW_W;
expected.ilu.inputs[0].swizzle[2] = NV2ASW_W;
expected.ilu.inputs[0].swizzle[3] = NV2ASW_W;
Nv2aVshStep actual;
auto result = nv2a_vsh_parse_step(&actual, kTest[0]);
BOOST_TEST(result == NV2AVPR_SUCCESS);
check_result(expected, actual);
}
BOOST_AUTO_TEST_CASE(step_paired_dp4_rsq) {
// DP4 oPos.x, R6, c[96] + RSQ R1.x, R2.x
static constexpr uint32_t kTest[][4] = {
{0x00000000, 0x08EC001B, 0x64361800, 0x90A88800},
};
Nv2aVshStep expected;
clear_step(&expected);
expected.mac.opcode = NV2AOP_DP4;
expected.mac.outputs[0].type = NV2ART_OUTPUT;
expected.mac.outputs[0].index = 0;
expected.mac.outputs[0].writemask = NV2AWM_X;
expected.mac.inputs[0].type = NV2ART_TEMPORARY;
expected.mac.inputs[0].index = 6;
expected.mac.inputs[1].type = NV2ART_CONTEXT;
expected.mac.inputs[1].index = 96;
expected.ilu.opcode = NV2AOP_RSQ;
expected.ilu.outputs[0].type = NV2ART_TEMPORARY;
expected.ilu.outputs[0].index = 1;
expected.ilu.outputs[0].writemask = NV2AWM_X;
expected.ilu.inputs[0].type = NV2ART_TEMPORARY;
expected.ilu.inputs[0].index = 2;
expected.ilu.inputs[0].swizzle[0] = NV2ASW_X;
expected.ilu.inputs[0].swizzle[1] = NV2ASW_X;
expected.ilu.inputs[0].swizzle[2] = NV2ASW_X;
expected.ilu.inputs[0].swizzle[3] = NV2ASW_X;
Nv2aVshStep actual;
auto result = nv2a_vsh_parse_step(&actual, kTest[0]);
BOOST_TEST(result == NV2AVPR_SUCCESS);
check_result(expected, actual);
}
BOOST_AUTO_TEST_CASE(step_multi_output) {
// DP4 oPos.z, R6, c[98] + DP4 R0.x, R6, c[98]
static constexpr uint32_t kTest[][4] = {
{0x00000000, 0x00EC401B, 0x64365800, 0x28002800},
};
Nv2aVshStep expected;
clear_step(&expected);
expected.mac.opcode = NV2AOP_DP4;
expected.mac.outputs[0].type = NV2ART_TEMPORARY;
expected.mac.outputs[0].index = 0;
expected.mac.outputs[0].writemask = NV2AWM_X;
expected.mac.outputs[1].type = NV2ART_OUTPUT;
expected.mac.outputs[1].index = 0;
expected.mac.outputs[1].writemask = NV2AWM_Z;
expected.mac.inputs[0].type = NV2ART_TEMPORARY;
expected.mac.inputs[0].index = 6;
expected.mac.inputs[1].type = NV2ART_CONTEXT;
expected.mac.inputs[1].index = 98;
Nv2aVshStep actual;
auto result = nv2a_vsh_parse_step(&actual, kTest[0]);
BOOST_TEST(result == NV2AVPR_SUCCESS);
check_result(expected, actual);
}
BOOST_AUTO_TEST_CASE(step_context_write) {
// DPH c[15].xy, v4, c[10]
static constexpr uint32_t kTest[][4] = {
{0x00000000, 0x00C1481B, 0x0836186C, 0x2070C078},
};
Nv2aVshStep expected;
clear_step(&expected);
expected.mac.opcode = NV2AOP_DPH;
expected.mac.outputs[0].type = NV2ART_CONTEXT;
expected.mac.outputs[0].index = 15;
expected.mac.outputs[0].writemask = NV2AWM_XY;
expected.mac.inputs[0].type = NV2ART_INPUT;
expected.mac.inputs[0].index = 4;
expected.mac.inputs[1].type = NV2ART_CONTEXT;
expected.mac.inputs[1].index = 10;
Nv2aVshStep actual;
auto result = nv2a_vsh_parse_step(&actual, kTest[0]);
BOOST_TEST(result == NV2AVPR_SUCCESS);
check_result(expected, actual);
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -0,0 +1,2 @@
#define BOOST_TEST_MODULE OperationTests
#include <boost/test/unit_test.hpp>

View File

@ -5,9 +5,9 @@
BOOST_AUTO_TEST_SUITE(basic_operation_suite)
BOOST_AUTO_TEST_CASE(mov) {
nv2a_vsh_register a = {0.0f, -1000.0f, 1000.0f, 64.123456f};
Nv2aVshRegister a = {0.0f, -1000.0f, 1000.0f, 64.123456f};
nv2a_vsh_register out;
Nv2aVshRegister out;
nv2a_vsh_cpu_mov(&out, &a);
BOOST_TEST(out.reg.x == a.reg.x);
@ -17,9 +17,9 @@ BOOST_AUTO_TEST_CASE(mov) {
}
BOOST_AUTO_TEST_CASE(arl_trivial) {
nv2a_vsh_register a = {10.0f, -1000.0f, 1000.0f, 64.123456f};
Nv2aVshRegister a = {10.0f, -1000.0f, 1000.0f, 64.123456f};
nv2a_vsh_register out;
Nv2aVshRegister out;
nv2a_vsh_cpu_arl(&out, &a);
BOOST_TEST(out.reg.x == a.reg.x);
@ -29,9 +29,9 @@ BOOST_AUTO_TEST_CASE(arl_trivial) {
}
BOOST_AUTO_TEST_CASE(arl_truncate) {
nv2a_vsh_register a = {10.12345f, -1000.0f, 1000.0f, 64.123456f};
Nv2aVshRegister a = {10.12345f, -1000.0f, 1000.0f, 64.123456f};
nv2a_vsh_register out;
Nv2aVshRegister out;
nv2a_vsh_cpu_arl(&out, &a);
BOOST_TEST(out.reg.x == 10.0f);
@ -41,9 +41,9 @@ BOOST_AUTO_TEST_CASE(arl_truncate) {
}
BOOST_AUTO_TEST_CASE(arl_biased) {
nv2a_vsh_register a = {9.9999999f, -1000.0f, 1000.0f, 64.123456f};
Nv2aVshRegister a = {9.9999999f, -1000.0f, 1000.0f, 64.123456f};
nv2a_vsh_register out;
Nv2aVshRegister out;
nv2a_vsh_cpu_arl(&out, &a);
BOOST_TEST(out.reg.x == 10.0f);