commit 5b91733d6ff71a4c0efa9ce2ab955bf1822ccdcb Author: Erik Abair Date: Fri Jun 17 20:29:34 2022 -0700 Initial import. diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..0ca0998 --- /dev/null +++ b/.clang-format @@ -0,0 +1,4 @@ +--- +Language: Cpp +BasedOnStyle: Google + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..460435e --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +*.cpp.d +*.obj +*.iso +*.lib +*.exe +*.xbe +*.pdb + +.DS_Store +.vscode/ +.vs/ +.idea/ +bin/ +__pycache__/ +build/ +cmake-build*/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..6bc3038 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,60 @@ +cmake_minimum_required(VERSION 3.18) + +project(nv2a_vsh_cpu) + +set(CMAKE_VERBOSE_MAKEFILE TRUE) + +enable_testing() +set(CMAKE_CXX_STANDARD 17) +include(CMakePushCheckState) +set(_CMAKE_PROCESSING_LANGUAGE "C") +include(CheckSymbolExists) +include (ExternalProject) +include(FindPkgConfig) + +find_package( + Boost 1.70 + COMPONENTS + unit_test_framework + REQUIRED +) + +set(GENERATED_FILES_DIR "${CMAKE_BINARY_DIR}/generated") +include_directories("${Boost_INCLUDE_DIR}" "${GENERATED_FILES_DIR}") + +set(CMAKE_CXX_FLAGS_DEBUG "-ggdb -O0") +set(CMAKE_CXX_FLAGS_RELEASE "-O3") + +# xbdm executable ------------------------------------ + +add_library( + nv2a_vsh_cpu + src/nv2a_vsh_cpu.c + src/nv2a_vsh_cpu.h +) + +target_include_directories( + nv2a_vsh_cpu + PRIVATE + src +) + +# Tests ---------------------------------------------- +add_executable( + operations_tests + test/operations/test_main.cpp + test/operations/test_basic.cpp +) +target_include_directories( + operations_tests + PRIVATE src + PRIVATE test +) +target_link_libraries( + operations_tests + LINK_PRIVATE + nv2a_vsh_cpu + ${Boost_LIBRARIES} +) +add_test(NAME operations_tests COMMAND operations_tests) +add_dependencies(operations_tests nv2a_vsh_cpu) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..fdddb29 --- /dev/null +++ b/LICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/githooks/pre-commit b/githooks/pre-commit new file mode 100755 index 0000000..7d507af --- /dev/null +++ b/githooks/pre-commit @@ -0,0 +1,102 @@ +#!/bin/bash +# +# To enable this hook, rename this file to "pre-commit" and copy into the +# ../.git/hooks directory. + + +# Cross platform projects tend to avoid non-ASCII filenames; prevent +# them from being added to the repository. We exploit the fact that the +# printable range starts at the space character and ends with tilde. +function check_no_nonascii_characters { + if [ "${allownonascii}" == "true" ]; then + return + fi + + # Note that the use of brackets around a tr range is ok here, (it's + # even required, for portability to Solaris 10's /usr/bin/tr), since + # the square bracket bytes happen to fall in the designated range. + if test $(git diff --cached --name-only --diff-filter=A -z "${against}" | + LC_ALL=C tr -d '[ -~]\0' | wc -c) != 0 + then + cat <<\EOF +Error: Attempt to add a non-ASCII file name. + +This can cause problems if you want to work with people on other platforms. + +To be portable it is advisable to rename the file. + +If you know what you are doing you can disable this check using: + + git config hooks.allownonascii true +EOF + exit 1 + fi +} + + +function check_no_diffmarkers_or_whitespace_errors { + # If there are whitespace errors, print the offending file names and fail. + set -e + git diff-index --check --cached "${against}" -- + set +e +} + + +function run_clang_format { + echo "${changed_c_filenames}" | grep -v '3rdparty' + if [[ "${changed_c_filenames}" == "" ]]; then + return + fi + # Run clang-format against any changed C++ files. + if ! which clang-format > /dev/null; then + cat <<\EOF +Warning: clang-format is not installed or is not in the PATH. + +Please install and amend this commit. + +Debian: + sudo apt install clang-format +EOF + return + fi + + # Reformat the files in-place and re-add any that were changed. + # + # Note that this has the side effect of incorporating changes to staged files + # that were not themselves staged. E.g., if you edit a file, `git add`, then + # edit some more, then commit, all of the changes will be committed, not just + # the staged ones. Depending on typical workflows it might be better to do + # something more complicated here, or to just have the hook fail instead of + # perform an in-place fix. + files_to_format="$(echo "${changed_c_filenames}" | grep -v '3rdparty')" + echo "${files_to_format}" | xargs clang-format -i + echo "${files_to_format}" | xargs git add +} + + +# If you want to allow non-ASCII filenames set this variable to true. +allownonascii=$(git config --bool hooks.allownonascii) + +if git rev-parse --verify HEAD >/dev/null 2>&1; then + against=HEAD +else + # Initial commit: diff against an empty tree object + against=$(git hash-object -t tree /dev/null) +fi + +# Redirect output to stderr. +exec 1>&2 + + +added_and_modified_filenames="$(git diff --cached --name-only --diff-filter=d)" +changed_c_filenames="$(echo "${added_and_modified_filenames}" | \ + grep -E '.*\.(c|cpp|h|hpp)$')" + + +# Allow blank line at EOF. +git config --local core.whitespace -blank-at-eof + +check_no_nonascii_characters +check_no_diffmarkers_or_whitespace_errors +run_clang_format + diff --git a/src/nv2a_vsh_cpu.c b/src/nv2a_vsh_cpu.c new file mode 100644 index 0000000..dca0cb9 --- /dev/null +++ b/src/nv2a_vsh_cpu.c @@ -0,0 +1,266 @@ +#include "nv2a_vsh_cpu.h" + +#include +#include + +void nv2a_vsh_cpu_mov(nv2a_vsh_register *out, const nv2a_vsh_register *a) { + memcpy(out, a, sizeof(*out)); +} + +void nv2a_vsh_cpu_arl(nv2a_vsh_register *out, const nv2a_vsh_register *a) { + float val = floorf(a->reg.x + 0.001f); + out->reg.x = val; + out->reg.y = val; + out->reg.z = val; + out->reg.w = val; +} + +/* + +def _arl(inst: dict, input: Context, output: Context): + # TODO: Validate this behavior on HW. + val = input.get(inst["inputs"][0])[0] + val = int(math.floor(val + 0.001)) + output.set(inst["output"], (val, val, val, val)) + + +def _mov(inst: dict, input: Context, output: Context): + for reg in inst["outputs"]: + output.set(reg, input.get(inst["inputs"][0])) + + +def _mac_mul(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + b = input.get(inst["inputs"][1]) + result = [a_val * b_val for a_val, b_val in zip(a, b)] + for reg in inst["outputs"]: + output.set(reg, tuple(result)) + + +def _mac_add(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + b = input.get(inst["inputs"][1]) + result = [a_val + b_val for a_val, b_val in zip(a, b)] + for reg in inst["outputs"]: + output.set(reg, tuple(result)) + + +def _mac_mad(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + b = input.get(inst["inputs"][1]) + result = [a_val * b_val for a_val, b_val in zip(a, b)] + c = input.get(inst["inputs"][2]) + result = [a_val + b_val for a_val, b_val in zip(result, c)] + for reg in inst["outputs"]: + output.set(reg, tuple(result)) + + +def _mac_dp3(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + b = input.get(inst["inputs"][1]) + result = [a_val * b_val for a_val, b_val in zip(a[:3], b[:3])] + + val = functools.reduce(lambda x, y: x + y, result) + result = [val] * 4 + for reg in inst["outputs"]: + output.set(reg, tuple(result)) + + +def _mac_dph(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + b = input.get(inst["inputs"][1]) + result = [a_val * b_val for a_val, b_val in zip(a[:3], b[:3])] + + val = functools.reduce(lambda x, y: x + y, result) + val += b[4] + result = [val] * 4 + for reg in inst["outputs"]: + output.set(reg, tuple(result)) + + +def _mac_dp4(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + b = input.get(inst["inputs"][1]) + result = [a_val * b_val for a_val, b_val in zip(a[:4], b[:4])] + + val = functools.reduce(lambda x, y: x + y, result) + result = [val] * 4 + for reg in inst["outputs"]: + output.set(reg, tuple(result)) + + +def _mac_dst(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + b = input.get(inst["inputs"][1]) + result = (1.0, a[1] * b[1], a[2], b[3]) + for reg in inst["outputs"]: + output.set(reg, tuple(result)) + + +def _mac_min(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + b = input.get(inst["inputs"][1]) + result = [a_val if a_val < b_val else b_val for a_val, b_val in zip(a[:4], +b[:4])] for reg in inst["outputs"]: output.set(reg, tuple(result)) + + +def _mac_max(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + b = input.get(inst["inputs"][1]) + result = [a_val if a_val >= b_val else b_val for a_val, b_val in zip(a[:4], +b[:4])] for reg in inst["outputs"]: output.set(reg, tuple(result)) + + +def _mac_slt(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + b = input.get(inst["inputs"][1]) + result = [1.0 if a_val < b_val else 0.0 for a_val, b_val in zip(a[:4], +b[:4])] for reg in inst["outputs"]: output.set(reg, tuple(result)) + + +def _mac_sge(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + b = input.get(inst["inputs"][1]) + result = [1.0 if a_val >= b_val else 0.0 for a_val, b_val in zip(a[:4], +b[:4])] for reg in inst["outputs"]: output.set(reg, tuple(result)) + + +_MAC_HANDLERS = { + nv2avsh.vsh_instruction.MAC.MAC_MOV: _mov, + nv2avsh.vsh_instruction.MAC.MAC_MUL: _mac_mul, + nv2avsh.vsh_instruction.MAC.MAC_ADD: _mac_add, + nv2avsh.vsh_instruction.MAC.MAC_MAD: _mac_mad, + nv2avsh.vsh_instruction.MAC.MAC_DP3: _mac_dp3, + nv2avsh.vsh_instruction.MAC.MAC_DPH: _mac_dph, + nv2avsh.vsh_instruction.MAC.MAC_DP4: _mac_dp4, + nv2avsh.vsh_instruction.MAC.MAC_DST: _mac_dst, + nv2avsh.vsh_instruction.MAC.MAC_MIN: _mac_min, + nv2avsh.vsh_instruction.MAC.MAC_MAX: _mac_max, + nv2avsh.vsh_instruction.MAC.MAC_SLT: _mac_slt, + nv2avsh.vsh_instruction.MAC.MAC_SGE: _mac_sge, + nv2avsh.vsh_instruction.MAC.MAC_ARL: _arl, +} + + +def _ilu_rcp(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + + def compute(val): + if val == 1.0: + return 1.0 + + if val == 0.0: + return math.inf + + return 1.0 / val + + result = [compute(val) for val in a[:4]] + for reg in inst["outputs"]: + output.set(reg, (result[0], result[1], result[2], result[3])) + + +def _ilu_rcc(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + + def compute(input): + if input < -1.84467e19: + input = -1.84467e19 + elif input > -5.42101e-20 and input < 0: + input = -5.42101e-020 + elif input >= 0 and input < 5.42101e-20: + input = 5.42101e-20 + elif input > 1.84467e19: + input = 1.84467e19 + + if input == 1.0: + return 1.0 + + return 1.0 / input + + result = [compute(val) for val in a[:4]] + for reg in inst["outputs"]: + output.set(reg, (result[0], result[1], result[2], result[3])) + + +def _ilu_rsq(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + + def compute(input): + if input == 1.0: + return 1.0 + + if input == 0: + return math.inf + + return 1.0 / math.sqrt(input) + + result = [compute(abs(val)) for val in a[:4]] + for reg in inst["outputs"]: + output.set(reg, (result[0], result[1], result[2], result[3])) + + +def _ilu_exp(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + + tmp = math.floor(a[0]) + x = math.pow(2, tmp) + y = a[0] - tmp + z = math.pow(2, a[0]) + w = 1.0 + + for reg in inst["outputs"]: + output.set(reg, (x, y, z, w)) + + +def _ilu_log(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + + tmp = math.floor(a[0]) + if tmp == 0.0: + x = -math.inf + y = 1.0 + z = -math.inf + w = 1.0 + else: + x = math.floor(math.log2(tmp)) + y = tmp / math.pow(2, math.floor(math.log2(tmp))) + z = math.log2(tmp) + w = 1.0 + + for reg in inst["outputs"]: + output.set(reg, (x, y, z, w)) + + +def _clamp(val, min_val, max_val): + return max(min(val, max_val), min_val) + + +def _ilu_lit(inst: dict, input: Context, output: Context): + a = input.get(inst["inputs"][0]) + epsilon = 1.0 / 256.0 + + sx = max(a[0], 0.0) + sy = max(a[1], 0.0) + sw = _clamp(a[3], -(128 - epsilon), 128 - epsilon) + + x = 1.0 + y = sx + z = 0.0 + if sx > 0: + z = math.pow(2, sw * math.log2(sy)) + w = 1.0 + + output.set(inst["output"], (x, y, z, w)) + + +_ILU_HANDLERS = { + nv2avsh.vsh_instruction.ILU.ILU_MOV: _mov, + nv2avsh.vsh_instruction.ILU.ILU_RCP: _ilu_rcp, + nv2avsh.vsh_instruction.ILU.ILU_RCC: _ilu_rcc, + nv2avsh.vsh_instruction.ILU.ILU_RSQ: _ilu_rsq, + nv2avsh.vsh_instruction.ILU.ILU_EXP: _ilu_exp, + nv2avsh.vsh_instruction.ILU.ILU_LOG: _ilu_log, + nv2avsh.vsh_instruction.ILU.ILU_LIT: _ilu_lit, +} + + */ diff --git a/src/nv2a_vsh_cpu.h b/src/nv2a_vsh_cpu.h new file mode 100644 index 0000000..171b0f7 --- /dev/null +++ b/src/nv2a_vsh_cpu.h @@ -0,0 +1,58 @@ +#ifndef NV2A_VSH_CPU_SRC_NV2A_VSH_CPU_H_ +#define NV2A_VSH_CPU_SRC_NV2A_VSH_CPU_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct nv2a_vsh_register_components { + float x; + float y; + float z; + float w; +}; + +typedef union nv2a_vsh_register_ { + struct nv2a_vsh_register_components reg; + float raw[4]; +} nv2a_vsh_register; + +#define OP_1(name) \ + void nv2a_vsh_cpu_##name(nv2a_vsh_register *out, const nv2a_vsh_register *a) +#define OP_2(name) \ + void nv2a_vsh_cpu_##name(nv2a_vsh_register *out, const nv2a_vsh_register *a, \ + const nv2a_vsh_register *b) +#define OP_3(name) \ + void nv2a_vsh_cpu_##name(nv2a_vsh_register *out, const nv2a_vsh_register *a, \ + const nv2a_vsh_register *b, \ + const nv2a_vsh_register *c) + +OP_1(mov); +OP_1(arl); +OP_2(mul); +OP_2(add); +OP_3(mad); +OP_2(dp3); +OP_2(dph); +OP_2(dp4); +OP_2(dst); +OP_2(min); +OP_2(max); +OP_2(slt); +OP_2(sge); +OP_1(rcp); +OP_1(rcc); +OP_1(rsq); +OP_1(exp); +OP_1(log); +OP_1(lit); + +#undef OP_1 +#undef OP_2 +#undef OP_3 + +#ifdef __cplusplus +}; +#endif + +#endif // NV2A_VSH_CPU_SRC_NV2A_VSH_CPU_H_ diff --git a/test/operations/test_basic.cpp b/test/operations/test_basic.cpp new file mode 100644 index 0000000..6bdabca --- /dev/null +++ b/test/operations/test_basic.cpp @@ -0,0 +1,55 @@ +#include + +#include "nv2a_vsh_cpu.h" + +BOOST_AUTO_TEST_SUITE(basic_operation_suite) + +BOOST_AUTO_TEST_CASE(mov) { + nv2a_vsh_register a = {0.0f, -1000.0f, 1000.0f, 64.123456f}; + + nv2a_vsh_register out; + nv2a_vsh_cpu_mov(&out, &a); + + BOOST_TEST(out.reg.x == a.reg.x); + BOOST_TEST(out.reg.y == a.reg.y); + BOOST_TEST(out.reg.z == a.reg.z); + BOOST_TEST(out.reg.w == a.reg.w); +} + +BOOST_AUTO_TEST_CASE(arl_trivial) { + nv2a_vsh_register a = {10.0f, -1000.0f, 1000.0f, 64.123456f}; + + nv2a_vsh_register out; + nv2a_vsh_cpu_arl(&out, &a); + + BOOST_TEST(out.reg.x == a.reg.x); + BOOST_TEST(out.reg.y == a.reg.x); + BOOST_TEST(out.reg.z == a.reg.x); + BOOST_TEST(out.reg.w == a.reg.x); +} + +BOOST_AUTO_TEST_CASE(arl_truncate) { + nv2a_vsh_register a = {10.12345f, -1000.0f, 1000.0f, 64.123456f}; + + nv2a_vsh_register out; + nv2a_vsh_cpu_arl(&out, &a); + + BOOST_TEST(out.reg.x == 10.0f); + BOOST_TEST(out.reg.y == 10.0f); + BOOST_TEST(out.reg.z == 10.0f); + BOOST_TEST(out.reg.w == 10.0f); +} + +BOOST_AUTO_TEST_CASE(arl_biased) { + nv2a_vsh_register a = {9.9999999f, -1000.0f, 1000.0f, 64.123456f}; + + nv2a_vsh_register out; + nv2a_vsh_cpu_arl(&out, &a); + + BOOST_TEST(out.reg.x == 10.0f); + BOOST_TEST(out.reg.y == 10.0f); + BOOST_TEST(out.reg.z == 10.0f); + BOOST_TEST(out.reg.w == 10.0f); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test/operations/test_main.cpp b/test/operations/test_main.cpp new file mode 100644 index 0000000..ea2b541 --- /dev/null +++ b/test/operations/test_main.cpp @@ -0,0 +1,2 @@ +#define BOOST_TEST_MODULE OperationTests +#include