Initial import.

This commit is contained in:
Erik Abair 2022-06-17 20:29:34 -07:00
commit 5b91733d6f
9 changed files with 587 additions and 0 deletions

4
.clang-format Normal file
View File

@ -0,0 +1,4 @@
---
Language: Cpp
BasedOnStyle: Google

16
.gitignore vendored Normal file
View File

@ -0,0 +1,16 @@
*.cpp.d
*.obj
*.iso
*.lib
*.exe
*.xbe
*.pdb
.DS_Store
.vscode/
.vs/
.idea/
bin/
__pycache__/
build/
cmake-build*/

60
CMakeLists.txt Normal file
View File

@ -0,0 +1,60 @@
cmake_minimum_required(VERSION 3.18)
project(nv2a_vsh_cpu)
set(CMAKE_VERBOSE_MAKEFILE TRUE)
enable_testing()
set(CMAKE_CXX_STANDARD 17)
include(CMakePushCheckState)
set(_CMAKE_PROCESSING_LANGUAGE "C")
include(CheckSymbolExists)
include (ExternalProject)
include(FindPkgConfig)
find_package(
Boost 1.70
COMPONENTS
unit_test_framework
REQUIRED
)
set(GENERATED_FILES_DIR "${CMAKE_BINARY_DIR}/generated")
include_directories("${Boost_INCLUDE_DIR}" "${GENERATED_FILES_DIR}")
set(CMAKE_CXX_FLAGS_DEBUG "-ggdb -O0")
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
# xbdm executable ------------------------------------
add_library(
nv2a_vsh_cpu
src/nv2a_vsh_cpu.c
src/nv2a_vsh_cpu.h
)
target_include_directories(
nv2a_vsh_cpu
PRIVATE
src
)
# Tests ----------------------------------------------
add_executable(
operations_tests
test/operations/test_main.cpp
test/operations/test_basic.cpp
)
target_include_directories(
operations_tests
PRIVATE src
PRIVATE test
)
target_link_libraries(
operations_tests
LINK_PRIVATE
nv2a_vsh_cpu
${Boost_LIBRARIES}
)
add_test(NAME operations_tests COMMAND operations_tests)
add_dependencies(operations_tests nv2a_vsh_cpu)

24
LICENSE Normal file
View File

@ -0,0 +1,24 @@
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <https://unlicense.org>

102
githooks/pre-commit Executable file
View File

@ -0,0 +1,102 @@
#!/bin/bash
#
# To enable this hook, rename this file to "pre-commit" and copy into the
# ../.git/hooks directory.
# Cross platform projects tend to avoid non-ASCII filenames; prevent
# them from being added to the repository. We exploit the fact that the
# printable range starts at the space character and ends with tilde.
function check_no_nonascii_characters {
if [ "${allownonascii}" == "true" ]; then
return
fi
# Note that the use of brackets around a tr range is ok here, (it's
# even required, for portability to Solaris 10's /usr/bin/tr), since
# the square bracket bytes happen to fall in the designated range.
if test $(git diff --cached --name-only --diff-filter=A -z "${against}" |
LC_ALL=C tr -d '[ -~]\0' | wc -c) != 0
then
cat <<\EOF
Error: Attempt to add a non-ASCII file name.
This can cause problems if you want to work with people on other platforms.
To be portable it is advisable to rename the file.
If you know what you are doing you can disable this check using:
git config hooks.allownonascii true
EOF
exit 1
fi
}
function check_no_diffmarkers_or_whitespace_errors {
# If there are whitespace errors, print the offending file names and fail.
set -e
git diff-index --check --cached "${against}" --
set +e
}
function run_clang_format {
echo "${changed_c_filenames}" | grep -v '3rdparty'
if [[ "${changed_c_filenames}" == "" ]]; then
return
fi
# Run clang-format against any changed C++ files.
if ! which clang-format > /dev/null; then
cat <<\EOF
Warning: clang-format is not installed or is not in the PATH.
Please install and amend this commit.
Debian:
sudo apt install clang-format
EOF
return
fi
# Reformat the files in-place and re-add any that were changed.
#
# Note that this has the side effect of incorporating changes to staged files
# that were not themselves staged. E.g., if you edit a file, `git add`, then
# edit some more, then commit, all of the changes will be committed, not just
# the staged ones. Depending on typical workflows it might be better to do
# something more complicated here, or to just have the hook fail instead of
# perform an in-place fix.
files_to_format="$(echo "${changed_c_filenames}" | grep -v '3rdparty')"
echo "${files_to_format}" | xargs clang-format -i
echo "${files_to_format}" | xargs git add
}
# If you want to allow non-ASCII filenames set this variable to true.
allownonascii=$(git config --bool hooks.allownonascii)
if git rev-parse --verify HEAD >/dev/null 2>&1; then
against=HEAD
else
# Initial commit: diff against an empty tree object
against=$(git hash-object -t tree /dev/null)
fi
# Redirect output to stderr.
exec 1>&2
added_and_modified_filenames="$(git diff --cached --name-only --diff-filter=d)"
changed_c_filenames="$(echo "${added_and_modified_filenames}" | \
grep -E '.*\.(c|cpp|h|hpp)$')"
# Allow blank line at EOF.
git config --local core.whitespace -blank-at-eof
check_no_nonascii_characters
check_no_diffmarkers_or_whitespace_errors
run_clang_format

266
src/nv2a_vsh_cpu.c Normal file
View File

@ -0,0 +1,266 @@
#include "nv2a_vsh_cpu.h"
#include <math.h>
#include <string.h>
void nv2a_vsh_cpu_mov(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
memcpy(out, a, sizeof(*out));
}
void nv2a_vsh_cpu_arl(nv2a_vsh_register *out, const nv2a_vsh_register *a) {
float val = floorf(a->reg.x + 0.001f);
out->reg.x = val;
out->reg.y = val;
out->reg.z = val;
out->reg.w = val;
}
/*
def _arl(inst: dict, input: Context, output: Context):
# TODO: Validate this behavior on HW.
val = input.get(inst["inputs"][0])[0]
val = int(math.floor(val + 0.001))
output.set(inst["output"], (val, val, val, val))
def _mov(inst: dict, input: Context, output: Context):
for reg in inst["outputs"]:
output.set(reg, input.get(inst["inputs"][0]))
def _mac_mul(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
b = input.get(inst["inputs"][1])
result = [a_val * b_val for a_val, b_val in zip(a, b)]
for reg in inst["outputs"]:
output.set(reg, tuple(result))
def _mac_add(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
b = input.get(inst["inputs"][1])
result = [a_val + b_val for a_val, b_val in zip(a, b)]
for reg in inst["outputs"]:
output.set(reg, tuple(result))
def _mac_mad(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
b = input.get(inst["inputs"][1])
result = [a_val * b_val for a_val, b_val in zip(a, b)]
c = input.get(inst["inputs"][2])
result = [a_val + b_val for a_val, b_val in zip(result, c)]
for reg in inst["outputs"]:
output.set(reg, tuple(result))
def _mac_dp3(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
b = input.get(inst["inputs"][1])
result = [a_val * b_val for a_val, b_val in zip(a[:3], b[:3])]
val = functools.reduce(lambda x, y: x + y, result)
result = [val] * 4
for reg in inst["outputs"]:
output.set(reg, tuple(result))
def _mac_dph(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
b = input.get(inst["inputs"][1])
result = [a_val * b_val for a_val, b_val in zip(a[:3], b[:3])]
val = functools.reduce(lambda x, y: x + y, result)
val += b[4]
result = [val] * 4
for reg in inst["outputs"]:
output.set(reg, tuple(result))
def _mac_dp4(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
b = input.get(inst["inputs"][1])
result = [a_val * b_val for a_val, b_val in zip(a[:4], b[:4])]
val = functools.reduce(lambda x, y: x + y, result)
result = [val] * 4
for reg in inst["outputs"]:
output.set(reg, tuple(result))
def _mac_dst(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
b = input.get(inst["inputs"][1])
result = (1.0, a[1] * b[1], a[2], b[3])
for reg in inst["outputs"]:
output.set(reg, tuple(result))
def _mac_min(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
b = input.get(inst["inputs"][1])
result = [a_val if a_val < b_val else b_val for a_val, b_val in zip(a[:4],
b[:4])] for reg in inst["outputs"]: output.set(reg, tuple(result))
def _mac_max(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
b = input.get(inst["inputs"][1])
result = [a_val if a_val >= b_val else b_val for a_val, b_val in zip(a[:4],
b[:4])] for reg in inst["outputs"]: output.set(reg, tuple(result))
def _mac_slt(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
b = input.get(inst["inputs"][1])
result = [1.0 if a_val < b_val else 0.0 for a_val, b_val in zip(a[:4],
b[:4])] for reg in inst["outputs"]: output.set(reg, tuple(result))
def _mac_sge(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
b = input.get(inst["inputs"][1])
result = [1.0 if a_val >= b_val else 0.0 for a_val, b_val in zip(a[:4],
b[:4])] for reg in inst["outputs"]: output.set(reg, tuple(result))
_MAC_HANDLERS = {
nv2avsh.vsh_instruction.MAC.MAC_MOV: _mov,
nv2avsh.vsh_instruction.MAC.MAC_MUL: _mac_mul,
nv2avsh.vsh_instruction.MAC.MAC_ADD: _mac_add,
nv2avsh.vsh_instruction.MAC.MAC_MAD: _mac_mad,
nv2avsh.vsh_instruction.MAC.MAC_DP3: _mac_dp3,
nv2avsh.vsh_instruction.MAC.MAC_DPH: _mac_dph,
nv2avsh.vsh_instruction.MAC.MAC_DP4: _mac_dp4,
nv2avsh.vsh_instruction.MAC.MAC_DST: _mac_dst,
nv2avsh.vsh_instruction.MAC.MAC_MIN: _mac_min,
nv2avsh.vsh_instruction.MAC.MAC_MAX: _mac_max,
nv2avsh.vsh_instruction.MAC.MAC_SLT: _mac_slt,
nv2avsh.vsh_instruction.MAC.MAC_SGE: _mac_sge,
nv2avsh.vsh_instruction.MAC.MAC_ARL: _arl,
}
def _ilu_rcp(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
def compute(val):
if val == 1.0:
return 1.0
if val == 0.0:
return math.inf
return 1.0 / val
result = [compute(val) for val in a[:4]]
for reg in inst["outputs"]:
output.set(reg, (result[0], result[1], result[2], result[3]))
def _ilu_rcc(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
def compute(input):
if input < -1.84467e19:
input = -1.84467e19
elif input > -5.42101e-20 and input < 0:
input = -5.42101e-020
elif input >= 0 and input < 5.42101e-20:
input = 5.42101e-20
elif input > 1.84467e19:
input = 1.84467e19
if input == 1.0:
return 1.0
return 1.0 / input
result = [compute(val) for val in a[:4]]
for reg in inst["outputs"]:
output.set(reg, (result[0], result[1], result[2], result[3]))
def _ilu_rsq(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
def compute(input):
if input == 1.0:
return 1.0
if input == 0:
return math.inf
return 1.0 / math.sqrt(input)
result = [compute(abs(val)) for val in a[:4]]
for reg in inst["outputs"]:
output.set(reg, (result[0], result[1], result[2], result[3]))
def _ilu_exp(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
tmp = math.floor(a[0])
x = math.pow(2, tmp)
y = a[0] - tmp
z = math.pow(2, a[0])
w = 1.0
for reg in inst["outputs"]:
output.set(reg, (x, y, z, w))
def _ilu_log(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
tmp = math.floor(a[0])
if tmp == 0.0:
x = -math.inf
y = 1.0
z = -math.inf
w = 1.0
else:
x = math.floor(math.log2(tmp))
y = tmp / math.pow(2, math.floor(math.log2(tmp)))
z = math.log2(tmp)
w = 1.0
for reg in inst["outputs"]:
output.set(reg, (x, y, z, w))
def _clamp(val, min_val, max_val):
return max(min(val, max_val), min_val)
def _ilu_lit(inst: dict, input: Context, output: Context):
a = input.get(inst["inputs"][0])
epsilon = 1.0 / 256.0
sx = max(a[0], 0.0)
sy = max(a[1], 0.0)
sw = _clamp(a[3], -(128 - epsilon), 128 - epsilon)
x = 1.0
y = sx
z = 0.0
if sx > 0:
z = math.pow(2, sw * math.log2(sy))
w = 1.0
output.set(inst["output"], (x, y, z, w))
_ILU_HANDLERS = {
nv2avsh.vsh_instruction.ILU.ILU_MOV: _mov,
nv2avsh.vsh_instruction.ILU.ILU_RCP: _ilu_rcp,
nv2avsh.vsh_instruction.ILU.ILU_RCC: _ilu_rcc,
nv2avsh.vsh_instruction.ILU.ILU_RSQ: _ilu_rsq,
nv2avsh.vsh_instruction.ILU.ILU_EXP: _ilu_exp,
nv2avsh.vsh_instruction.ILU.ILU_LOG: _ilu_log,
nv2avsh.vsh_instruction.ILU.ILU_LIT: _ilu_lit,
}
*/

58
src/nv2a_vsh_cpu.h Normal file
View File

@ -0,0 +1,58 @@
#ifndef NV2A_VSH_CPU_SRC_NV2A_VSH_CPU_H_
#define NV2A_VSH_CPU_SRC_NV2A_VSH_CPU_H_
#ifdef __cplusplus
extern "C" {
#endif
struct nv2a_vsh_register_components {
float x;
float y;
float z;
float w;
};
typedef union nv2a_vsh_register_ {
struct nv2a_vsh_register_components reg;
float raw[4];
} nv2a_vsh_register;
#define OP_1(name) \
void nv2a_vsh_cpu_##name(nv2a_vsh_register *out, const nv2a_vsh_register *a)
#define OP_2(name) \
void nv2a_vsh_cpu_##name(nv2a_vsh_register *out, const nv2a_vsh_register *a, \
const nv2a_vsh_register *b)
#define OP_3(name) \
void nv2a_vsh_cpu_##name(nv2a_vsh_register *out, const nv2a_vsh_register *a, \
const nv2a_vsh_register *b, \
const nv2a_vsh_register *c)
OP_1(mov);
OP_1(arl);
OP_2(mul);
OP_2(add);
OP_3(mad);
OP_2(dp3);
OP_2(dph);
OP_2(dp4);
OP_2(dst);
OP_2(min);
OP_2(max);
OP_2(slt);
OP_2(sge);
OP_1(rcp);
OP_1(rcc);
OP_1(rsq);
OP_1(exp);
OP_1(log);
OP_1(lit);
#undef OP_1
#undef OP_2
#undef OP_3
#ifdef __cplusplus
};
#endif
#endif // NV2A_VSH_CPU_SRC_NV2A_VSH_CPU_H_

View File

@ -0,0 +1,55 @@
#include <boost/test/unit_test.hpp>
#include "nv2a_vsh_cpu.h"
BOOST_AUTO_TEST_SUITE(basic_operation_suite)
BOOST_AUTO_TEST_CASE(mov) {
nv2a_vsh_register a = {0.0f, -1000.0f, 1000.0f, 64.123456f};
nv2a_vsh_register out;
nv2a_vsh_cpu_mov(&out, &a);
BOOST_TEST(out.reg.x == a.reg.x);
BOOST_TEST(out.reg.y == a.reg.y);
BOOST_TEST(out.reg.z == a.reg.z);
BOOST_TEST(out.reg.w == a.reg.w);
}
BOOST_AUTO_TEST_CASE(arl_trivial) {
nv2a_vsh_register a = {10.0f, -1000.0f, 1000.0f, 64.123456f};
nv2a_vsh_register out;
nv2a_vsh_cpu_arl(&out, &a);
BOOST_TEST(out.reg.x == a.reg.x);
BOOST_TEST(out.reg.y == a.reg.x);
BOOST_TEST(out.reg.z == a.reg.x);
BOOST_TEST(out.reg.w == a.reg.x);
}
BOOST_AUTO_TEST_CASE(arl_truncate) {
nv2a_vsh_register a = {10.12345f, -1000.0f, 1000.0f, 64.123456f};
nv2a_vsh_register out;
nv2a_vsh_cpu_arl(&out, &a);
BOOST_TEST(out.reg.x == 10.0f);
BOOST_TEST(out.reg.y == 10.0f);
BOOST_TEST(out.reg.z == 10.0f);
BOOST_TEST(out.reg.w == 10.0f);
}
BOOST_AUTO_TEST_CASE(arl_biased) {
nv2a_vsh_register a = {9.9999999f, -1000.0f, 1000.0f, 64.123456f};
nv2a_vsh_register out;
nv2a_vsh_cpu_arl(&out, &a);
BOOST_TEST(out.reg.x == 10.0f);
BOOST_TEST(out.reg.y == 10.0f);
BOOST_TEST(out.reg.z == 10.0f);
BOOST_TEST(out.reg.w == 10.0f);
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -0,0 +1,2 @@
#define BOOST_TEST_MODULE OperationTests
#include <boost/test/unit_test.hpp>