diff --git a/.clang-format b/.clang-format index aeb5bb2..2edf43e 100644 --- a/.clang-format +++ b/.clang-format @@ -57,7 +57,7 @@ BreakConstructorInitializersBeforeComma: false BreakConstructorInitializers: BeforeColon BreakAfterJavaFieldAnnotations: false BreakStringLiterals: true -ColumnLimit: 80 +ColumnLimit: 120 CommentPragmas: '^ IWYU pragma:' CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: true diff --git a/.gitignore b/.gitignore index c836bbe..5592ee8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,12 @@ *.cpp.d +*.c.d *.obj *.iso *.lib *.exe *.xbe *.pdb +*.inl .DS_Store .vscode/ diff --git a/Makefile b/Makefile index c00ae9c..848c19d 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,15 @@ XBE_TITLE = nxdk_texture_format_tests GEN_XISO = $(XBE_TITLE).iso -SRCS = $(CURDIR)/math3d.c $(CURDIR)/main.c $(CURDIR)/swizzle.c -SHADER_OBJS = ps.inl vs.inl NXDK_DIR ?= $(CURDIR)/../nxdk NXDK_SDL = y NXDK_CXX = y +SRCS = \ + $(CURDIR)/main.cpp \ + $(CURDIR)/math3d.c \ + $(CURDIR)/third_party/swizzle.c + +SHADER_OBJS = ps.inl vs.inl + include $(NXDK_DIR)/Makefile diff --git a/githooks/pre-commit b/githooks/pre-commit new file mode 100755 index 0000000..caa0d08 --- /dev/null +++ b/githooks/pre-commit @@ -0,0 +1,102 @@ +#!/bin/bash +# +# To enable this hook, rename this file to "pre-commit" and copy into the +# ../.git/hooks directory. + + +# Cross platform projects tend to avoid non-ASCII filenames; prevent +# them from being added to the repository. We exploit the fact that the +# printable range starts at the space character and ends with tilde. +function check_no_nonascii_characters { + if [ "${allownonascii}" == "true" ]; then + return + fi + + # Note that the use of brackets around a tr range is ok here, (it's + # even required, for portability to Solaris 10's /usr/bin/tr), since + # the square bracket bytes happen to fall in the designated range. + if test $(git diff --cached --name-only --diff-filter=A -z "${against}" | + LC_ALL=C tr -d '[ -~]\0' | wc -c) != 0 + then + cat <<\EOF +Error: Attempt to add a non-ASCII file name. + +This can cause problems if you want to work with people on other platforms. + +To be portable it is advisable to rename the file. + +If you know what you are doing you can disable this check using: + + git config hooks.allownonascii true +EOF + exit 1 + fi +} + + +function check_no_diffmarkers_or_whitespace_errors { + # If there are whitespace errors, print the offending file names and fail. + set -e + git diff-index --check --cached "${against}" -- + set +e +} + + +function run_clang_format { + echo "${changed_c_filenames}" | grep -v '3rdparty' + if [[ "${changed_c_filenames}" == "" ]]; then + return + fi + # Run clang-format against any changed C++ files. + if ! which clang-format > /dev/null; then + cat <<\EOF +Warning: clang-format is not installed or is not in the PATH. + +Please install and amend this commit. + +Debian: + sudo apt install clang-format +EOF + return + fi + + # Reformat the files in-place and re-add any that were changed. + # + # Note that this has the side effect of incorporating changes to staged files + # that were not themselves staged. E.g., if you edit a file, `git add`, then + # edit some more, then commit, all of the changes will be committed, not just + # the staged ones. Depending on typical workflows it might be better to do + # something more complicated here, or to just have the hook fail instead of + # perform an in-place fix. + files_to_format="$(echo "${changed_c_filenames}" | grep -Ev 'third_party|resources')" + echo "${files_to_format}" | xargs clang-format -i + echo "${files_to_format}" | xargs git add +} + + +# If you want to allow non-ASCII filenames set this variable to true. +allownonascii=$(git config --bool hooks.allownonascii) + +if git rev-parse --verify HEAD >/dev/null 2>&1; then + against=HEAD +else + # Initial commit: diff against an empty tree object + against=$(git hash-object -t tree /dev/null) +fi + +# Redirect output to stderr. +exec 1>&2 + + +added_and_modified_filenames="$(git diff --cached --name-only --diff-filter=d)" +changed_c_filenames="$(echo "${added_and_modified_filenames}" | \ + grep -E '.*\.(c|cpp|h|hpp)$')" + + +# Allow blank line at EOF. +git config --local core.whitespace -blank-at-eof + +check_no_nonascii_characters +check_no_diffmarkers_or_whitespace_errors +run_clang_format + diff --git a/main.c b/main.c deleted file mode 100644 index d81031e..0000000 --- a/main.c +++ /dev/null @@ -1,552 +0,0 @@ -/* - * This sample provides a very basic demonstration of 3D rendering on the Xbox, - * using pbkit. Based on the pbkit demo sources. - */ -#include -#include -#define _USE_MATH_DEFINES -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "math3d.h" -#include -#include -#include "swizzle.h" - -typedef struct TextureFormatInfo { - SDL_PixelFormatEnum SdlFormat; - uint32_t XboxFormat; - uint16_t XboxBpp; // bytes per pixel - bool XboxSwizzled; - bool RequireConversion; - char* Name; -} TextureFormatInfo; - -#pragma pack(1) -typedef struct Vertex { - float pos[3]; - float texcoord[2]; - float normal[3]; -} Vertex; - -#pragma pack() - -static Vertex *alloc_vertices; // texcoords 0 to width/height -static Vertex *alloc_vertices_swizzled; // texcoords normalized 0 to 1 -static uint32_t num_vertices; - -MATRIX m_model, m_view, m_proj; - -VECTOR v_cam_pos = { 0, 0.05, 1.07, 1 }; -VECTOR v_cam_rot = { 0, 0, 0, 1 }; -VECTOR v_light_dir = { 0, 0, 1, 1 }; - -#include "verts.h" -#include "texture.h" - -#define MASK(mask, val) (((val) << (ffs(mask)-1)) & (mask)) -#define MAXRAM 0x03FFAFFF - -// TODO: upstream missing nv2a defines -#define NV2A_VERTEX_ATTR_POSITION 0 -#define NV2A_VERTEX_ATTR_NORMAL 2 -#define NV2A_VERTEX_ATTR_TEXTURE0 9 -#define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8 0x17 -#define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8 0x3B -#define NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 0x24 -#define NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8 0x25 -#define NV097_SET_TEXTURE_FORMAT_COLOR_D16 0x2C // TODO: proper nvidia name -#define NV097_SET_TEXTURE_FORMAT_COLOR_LIN_F16 0x31 // TODO: proper nvidia name -#define NV097_SET_CONTROL0_COLOR_SPACE_CONVERT 0xF0000000 -#define NV097_SET_CONTROL0_COLOR_SPACE_CONVERT_CRYCB_TO_RGB 0x1 - -static void matrix_viewport(float out[4][4], float x, float y, float width, float height, float z_min, float z_max); -static void init_shader(void); -static void init_textures(void); -static void set_attrib_pointer(unsigned int index, unsigned int format, unsigned int size, unsigned int stride, const void* data); -static void draw_arrays(unsigned int mode, int start, int count); -static int update_texture_memory(void* texMem, TextureFormatInfo format, int width, int height); - -static const TextureFormatInfo format_map[] = { - - // swizzled - { SDL_PIXELFORMAT_ABGR8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8, 4, true, false, "A8B8G8R8" }, - { SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8, 4, true, false, "R8G8B8A8" }, - { SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8, 4, true, false, "A8R8G8B8" }, - { SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8, 4, true, false, "X8R8G8B8" }, - { SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8, 4, true, false, "B8G8R8A8" }, - { SDL_PIXELFORMAT_RGB565, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5, 2, true, false, "R5G6B5" }, - { SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5, 2, true, false, "A1R5G5B5" }, - { SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5, 2, true, false, "X1R5G5B5" }, - { SDL_PIXELFORMAT_ARGB4444, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4, 2, true, false, "A4R4G4B4" }, - - // linear unsigned - { SDL_PIXELFORMAT_ABGR8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8, 4, false, false, "A8B8G8R8" }, - { SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8, 4, false, false, "R8G8B8A8" }, - { SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8, 4, false, false, "A8R8G8B8" }, - { SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8, 4, false, false, "X8R8G8B8" }, - { SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8, 4, false, false, "B8G8R8A8" }, - { SDL_PIXELFORMAT_RGB565, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5, 2, false, false, "R5G6B5" }, - { SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5, 2, false, false, "A1R5G5B5" }, - { SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5, 2, false, false, "X1R5G5B5" }, - { SDL_PIXELFORMAT_ARGB4444, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4, 2, false, false, "A4R4G4B4" }, - - // yuv color space - // Each 4 bytes represent the color for 2 neighboring pixels: - // [ U0 | Y0 | V0 | Y1 ] - // Y0 is the brightness of pixel 0, Y1 the brightness of pixel 1. - // U0 and V0 is the color of both pixels. (second pixel is the one sampled? or averaged? doesn't really matter here) - // https://docs.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering#converting-8-bit-yuv-to-rgb888 - { SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8, 2, false, true, "YUY2" }, - { SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8, 2, false, true, "UYVY" }, - - //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16, false, true, "Y16" }, - //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8, true, true, "SZ_Y8" }, - //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8, false, true, "Y8" }, - //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8, true, true, "SZ_AY8" }, - //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8, false, true, "AY8" }, - //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8, true, true, "SZ_A8" }, - //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8, true, true, "SZ_A8Y8" }, - - // misc formats - //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5, false, true, "DXT1" }, - //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8, false, true, "DXT3" }, - //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8, false, true, "DXT5" }, - //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8, true, true, "SZ_G8B8" }, - { SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8, 2, false, true, "G8B8" }, - //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_D16, false, true, "D16" }, // TODO: implement in xemu - //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LIN_F16, false, true, "LIN_F16" }, // TODO: implement in xemu - //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8, true, true, "SZ_R8B8" }, - //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5, true, true, "R6G5B5" } - - // TODO: define others here -}; - -// bitscan forward -int bsf(int val) { - __asm bsf eax, val -} - -/* Main program function */ -int main(void) -{ - uint32_t *p; - int i, status; - int width = 640, height = 480; - float m_viewport[4][4]; - int format_map_index = 0; - bool toggleFormat; - int texWidth = 256, texHeight = 256; - SDL_GameController *gameController; - - XVideoSetMode(width, height, 32, REFRESH_DEFAULT); - - // initialize input for the first gamepad - SDL_InitSubSystem(SDL_INIT_GAMECONTROLLER); - gameController = SDL_GameControllerOpen(0); - if (!gameController) { - debugPrint("Failed to initialize input for gamepad 0"); - Sleep(2000); - return 1; - } - - if ((status = pb_init())) { - debugPrint("pb_init Error %d\n", status); - Sleep(2000); - return 1; - } - - pb_show_front_screen(); - - /* Load constant rendering things (shaders, geometry) */ - init_shader(); - - // real nv2a hardware seems to cache this and not honor updates? have separate vertex buffers for swizzled and linear for now... - alloc_vertices = MmAllocateContiguousMemoryEx(sizeof(vertices), 0, MAXRAM, 0, PAGE_WRITECOMBINE | PAGE_READWRITE); - alloc_vertices_swizzled = MmAllocateContiguousMemoryEx(sizeof(vertices), 0, MAXRAM, 0, PAGE_WRITECOMBINE | PAGE_READWRITE); - memcpy(alloc_vertices, vertices, sizeof(vertices)); - memcpy(alloc_vertices_swizzled, vertices, sizeof(vertices)); - num_vertices = sizeof(vertices)/sizeof(vertices[0]); - for (int i = 0; i < num_vertices; i++) { - if (alloc_vertices[i].texcoord[0]) alloc_vertices[i].texcoord[0] = texWidth * 1.0f; - if (alloc_vertices[i].texcoord[1]) alloc_vertices[i].texcoord[1] = texHeight * 1.0f; - } - - // allocate texture memory buffer large enough for all types - void *texMem = MmAllocateContiguousMemoryEx(texWidth * texHeight * 4, 0, MAXRAM, 0, PAGE_WRITECOMBINE | PAGE_READWRITE); - int texError = update_texture_memory(texMem, format_map[format_map_index], texWidth, texHeight); - - /* Create view matrix (our camera is static) */ - matrix_unit(m_view); - create_world_view(m_view, v_cam_pos, v_cam_rot); - - /* Create projection matrix */ - matrix_unit(m_proj); - create_view_screen(m_proj, (float)width/(float)height, -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 10000.0f); - - /* Create viewport matrix, combine with projection */ - matrix_viewport(m_viewport, 0, 0, width, height, 0, 65536.0f); - matrix_multiply(m_proj, m_proj, (float*)m_viewport); - - /* Create local->world matrix given our updated object */ - matrix_unit(m_model); - - while(1) { - - // cycle current texture based on A or B button presses - SDL_GameControllerUpdate(); - bool aPress = SDL_GameControllerGetButton(gameController, SDL_CONTROLLER_BUTTON_A); - bool bPress = SDL_GameControllerGetButton(gameController, SDL_CONTROLLER_BUTTON_B); - if (aPress || bPress) { - if (toggleFormat) { - // TODO: back doesn't wrap as intended, re-do logic - format_map_index = (format_map_index + (aPress ? 1 : -1)) % (sizeof(format_map) / sizeof(format_map[0])); - texError = update_texture_memory(texMem, format_map[format_map_index], texWidth, texHeight); - } - toggleFormat = false; - } else toggleFormat = true; - - pb_wait_for_vbl(); - pb_reset(); - pb_target_back_buffer(); - - /* Clear depth & stencil buffers */ - pb_erase_depth_stencil_buffer(0, 0, width, height); - pb_fill(0, 0, width, height, 0xff000000); - pb_erase_text_screen(); - - while(pb_busy()) { - /* Wait for completion... */ - } - - /* - * Setup texture stages - */ - - /* Enable texture stage 0 */ - /* FIXME: Use constants instead of the hardcoded values below */ - p = pb_begin(); - - // first one seems to be needed - p = pb_push1(p, NV097_SET_FRONT_FACE, NV097_SET_FRONT_FACE_V_CCW); - p = pb_push1(p, NV097_SET_DEPTH_TEST_ENABLE, true); - - // Enable alpha blending functionality - p = pb_push1(p, NV097_SET_BLEND_ENABLE, true); - - // Set the alpha blend source (s) and destination (d) factors - p = pb_push1(p, NV097_SET_BLEND_FUNC_SFACTOR, NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA); - p = pb_push1(p, NV097_SET_BLEND_FUNC_DFACTOR, NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_ALPHA); - - // yuv requires color space conversion - if (format_map[format_map_index].XboxFormat == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 || - format_map[format_map_index].XboxFormat == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8) { - p = pb_push1(p, NV097_SET_CONTROL0, - MASK(NV097_SET_CONTROL0_COLOR_SPACE_CONVERT, NV097_SET_CONTROL0_COLOR_SPACE_CONVERT_CRYCB_TO_RGB)); - } - - DWORD format_mask = MASK(NV097_SET_TEXTURE_FORMAT_CONTEXT_DMA, 1) | - MASK(NV097_SET_TEXTURE_FORMAT_CUBEMAP_ENABLE, 0) | - MASK(NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE, NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE_COLOR) | - MASK(NV097_SET_TEXTURE_FORMAT_DIMENSIONALITY, 2) | - MASK(NV097_SET_TEXTURE_FORMAT_COLOR, format_map[format_map_index].XboxFormat) | - MASK(NV097_SET_TEXTURE_FORMAT_MIPMAP_LEVELS, 1) | - MASK(NV097_SET_TEXTURE_FORMAT_BASE_SIZE_U, format_map[format_map_index].XboxSwizzled ? bsf(texWidth) : 0) | - MASK(NV097_SET_TEXTURE_FORMAT_BASE_SIZE_V, format_map[format_map_index].XboxSwizzled ? bsf(texHeight) : 0) | - MASK(NV097_SET_TEXTURE_FORMAT_BASE_SIZE_P, 0); - p = pb_push2(p,NV20_TCL_PRIMITIVE_3D_TX_OFFSET(0),(DWORD)texMem & 0x03ffffff,format_mask); //set stage 0 texture address & format - if (!format_map[format_map_index].XboxSwizzled) { - p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_NPOT_PITCH(0),(format_map[format_map_index].XboxBpp * texWidth)<<16); //set stage 0 texture pitch (pitch<<16) - p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_NPOT_SIZE(0),(texWidth<<16)|texHeight); //set stage 0 texture width & height ((witdh<<16)|height) - } - p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_WRAP(0),0x00030303);//set stage 0 texture modes (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp 4=border 5=clamp to edge) - p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_ENABLE(0),0x4003ffc0); //set stage 0 texture enable flags - p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_FILTER(0),0x04074000); //set stage 0 texture filters (AA!) - - pb_end(p); - - /* Disable other texture stages */ - p = pb_begin(); - p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_ENABLE(1),0x0003ffc0);//set stage 1 texture enable flags (bit30 disabled) - p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_ENABLE(2),0x0003ffc0);//set stage 2 texture enable flags (bit30 disabled) - p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_ENABLE(3),0x0003ffc0);//set stage 3 texture enable flags (bit30 disabled) - p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_WRAP(1),0x00030303);//set stage 1 texture modes (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp 4=border 5=clamp to edge) - p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_WRAP(2),0x00030303);//set stage 2 texture modes (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp 4=border 5=clamp to edge) - p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_WRAP(3),0x00030303);//set stage 3 texture modes (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp 4=border 5=clamp to edge) - p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_FILTER(1),0x02022000);//set stage 1 texture filters (no AA, stage not even used) - p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_FILTER(2),0x02022000);//set stage 2 texture filters (no AA, stage not even used) - p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_FILTER(3),0x02022000);//set stage 3 texture filters (no AA, stage not even used) - pb_end(p); - - /* Send shader constants - * - * WARNING: Changing shader source code may impact constant locations! - * Check the intermediate file (*.inl) for the expected locations after - * changing the code. - */ - p = pb_begin(); - - /* Set shader constants cursor at C0 */ - p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_ID, 96); - - /* Send the model matrix */ - pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 16); - memcpy(p, m_model, 16*4); p+=16; - - /* Send the view matrix */ - pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 16); - memcpy(p, m_view, 16*4); p+=16; - - /* Send the projection matrix */ - pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 16); - memcpy(p, m_proj, 16*4); p+=16; - - /* Send camera position */ - pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 4); - memcpy(p, v_cam_pos, 4*4); p+=4; - - /* Send light direction */ - pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 4); - memcpy(p, v_light_dir, 4*4); p+=4; - - /* Send shader constants */ - float constants_0[4] = {0, 0, 0, 0}; - pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 4); - memcpy(p, constants_0, 4*4); p+=4; - - /* Clear all attributes */ - pb_push(p++,NV097_SET_VERTEX_DATA_ARRAY_FORMAT,16); - for(i = 0; i < 16; i++) { - *(p++) = 2; - } - pb_end(p); - - /* - * Setup vertex attributes - */ - - Vertex *vptr = format_map[format_map_index].XboxSwizzled ? alloc_vertices_swizzled : alloc_vertices; - - /* Set vertex position attribute */ - set_attrib_pointer(NV2A_VERTEX_ATTR_POSITION, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F, - 3, sizeof(Vertex), &vptr[0].pos); - - /* Set texture coordinate attribute */ - set_attrib_pointer(NV2A_VERTEX_ATTR_TEXTURE0, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F, - 2, sizeof(Vertex), &vptr[0].texcoord); - - /* Set vertex normal attribute */ - set_attrib_pointer(NV2A_VERTEX_ATTR_NORMAL, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F, - 3, sizeof(Vertex), &vptr[0].normal); - - /* Begin drawing triangles */ - draw_arrays(NV097_SET_BEGIN_END_OP_TRIANGLES, 0, num_vertices); - - /* Draw some text on the screen */ - pb_print("N: %s\n", format_map[format_map_index].Name); - pb_print("F: 0x%x\n", format_map[format_map_index].XboxFormat); - pb_print("SZ: %d\n", format_map[format_map_index].XboxSwizzled); - pb_print("C: %d\n", format_map[format_map_index].RequireConversion); - pb_print("W: %d\n", texWidth); - pb_print("H: %d\n", texHeight); - pb_print("P: %d\n", format_map[format_map_index].XboxBpp * texWidth); - pb_print("ERR: %d\n", texError); - pb_draw_text_screen(); - - while(pb_busy()) { - /* Wait for completion... */ - } - - /* Swap buffers (if we can) */ - while (pb_finished()) { - /* Not ready to swap yet */ - } - } - - /* Unreachable cleanup code */ - SDL_GameControllerClose(gameController); - SDL_QuitSubSystem(SDL_INIT_GAMECONTROLLER); - MmFreeContiguousMemory(alloc_vertices); - MmFreeContiguousMemory(alloc_vertices_swizzled); - MmFreeContiguousMemory(texMem); - pb_show_debug_screen(); - pb_kill(); - return 0; -} - - -static int update_texture_memory(void *texMem, TextureFormatInfo format, int width, int height) -{ - // create source surface - SDL_Surface *gradient_surf = SDL_CreateRGBSurfaceWithFormat(0, width, height, 32, SDL_PIXELFORMAT_RGBA8888); - if (gradient_surf == NULL) - return 1; - - if (SDL_LockSurface(gradient_surf)) - return 2; - - // TODO: have different color patterns controlled by alternate gamepad button(s) - // generate basic gradient pattern - uint32_t *pixels = gradient_surf->pixels; - for (int y = 0; y < height; y++) - for (int x = 0; x < width; x++) { - int xNorm = x * 255.0f / width; - int yNorm = y * 255.0f / height; - pixels[y * width + x] = SDL_MapRGBA(gradient_surf->format, yNorm, xNorm, 255 - yNorm, xNorm + yNorm); - } - - SDL_UnlockSurface(gradient_surf); - - // if conversion required, do so, otherwise use SDL to convert - if (format.RequireConversion) { - uint8_t *dstP = (uint8_t*)texMem; - - // TODO: potential reference material - https://github.com/scalablecory/colors/blob/master/color.c - switch (format.XboxFormat) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8: // YUY2 aka YUYV - for (int y = 0; y < height; y++) - for (int x = 0; x < width; x += 2) { - uint8_t R0, G0, B0, R1, G1, B1; - SDL_GetRGB(pixels[y * width + x], gradient_surf->format, &R0, &G0, &B0); - SDL_GetRGB(pixels[y * width + x + 1], gradient_surf->format, &R1, &G1, &B1); - dstP[0] = (0.257f * R0) + (0.504f * G0) + (0.098f * B0) + 16; // Y0 - dstP[1] = -(0.148f * R1) - (0.291f * G1) + (0.439f * B1) + 128; // U - dstP[2] = (0.257f * R1) + (0.504f * G1) + (0.098f * B1) + 16; // Y1 - dstP[3] = (0.439f * R1) - (0.368f * G1) - (0.071f * B1) + 128; // V - dstP += 4; - } - break; - case NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8: // UYVY - for (int y = 0; y < height; y++) - for (int x = 0; x < width; x += 2) { - uint8_t R0, G0, B0, R1, G1, B1; - SDL_GetRGB(pixels[y * width + x], gradient_surf->format, &R0, &G0, &B0); - SDL_GetRGB(pixels[y * width + x + 1], gradient_surf->format, &R1, &G1, &B1); - dstP[0] = -(0.148f * R1) - (0.291f * G1) + (0.439f * B1) + 128; // U - dstP[1] = (0.257f * R0) + (0.504f * G0) + (0.098f * B0) + 16; // Y0 - dstP[2] = (0.439f * R1) - (0.368f * G1) - (0.071f * B1) + 128; // V - dstP[3] = (0.257f * R1) + (0.504f * G1) + (0.098f * B1) + 16; // Y1 - dstP += 4; - } - break; - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8: - // TODO: for now, just let default gradient happen - break; - default: - SDL_FreeSurface(gradient_surf); - return 3; - break; - } - - // TODO: swizzling - - SDL_FreeSurface(gradient_surf); - } else { - - // standard SDL conversion to destination format - SDL_Surface *new_surf = SDL_ConvertSurfaceFormat(gradient_surf, format.SdlFormat, 0); - SDL_FreeSurface(gradient_surf); - if (!new_surf) - return 4; - - // copy pixels over to texture memory, swizzling if desired - if (format.XboxSwizzled) { - swizzle_rect((uint8_t*)new_surf->pixels, new_surf->w, new_surf->h, texMem, new_surf->pitch, new_surf->format->BytesPerPixel); - } else { - memcpy(texMem, new_surf->pixels, new_surf->pitch * new_surf->h); - } - - SDL_FreeSurface(new_surf); - } - - return 0; -} - -/* Construct a viewport transformation matrix */ -static void matrix_viewport(float out[4][4], float x, float y, float width, float height, float z_min, float z_max) -{ - memset(out, 0, 4*4*sizeof(float)); - out[0][0] = width/2.0f; - out[1][1] = height/-2.0f; - out[2][2] = (z_max - z_min)/2.0f; - out[3][3] = 1.0f; - out[3][0] = x + width/2.0f; - out[3][1] = y + height/2.0f; - out[3][2] = (z_min + z_max)/2.0f; -} - -/* Load the shader we will render with */ -static void init_shader(void) -{ - uint32_t *p; - int i; - - /* Setup vertex shader */ - uint32_t vs_program[] = { - #include "vs.inl" - }; - - p = pb_begin(); - - /* Set run address of shader */ - p = pb_push1(p, NV097_SET_TRANSFORM_PROGRAM_START, 0); - - /* Set execution mode */ - p = pb_push1(p, NV097_SET_TRANSFORM_EXECUTION_MODE, - MASK(NV097_SET_TRANSFORM_EXECUTION_MODE_MODE, NV097_SET_TRANSFORM_EXECUTION_MODE_MODE_PROGRAM) - | MASK(NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE, NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE_PRIV)); - - p = pb_push1(p, NV097_SET_TRANSFORM_PROGRAM_CXT_WRITE_EN, 0); - pb_end(p); - - /* Set cursor and begin copying program */ - p = pb_begin(); - p = pb_push1(p, NV097_SET_TRANSFORM_PROGRAM_LOAD, 0); - pb_end(p); - - /* Copy program instructions (16-bytes each) */ - for (i=0; i +#include + +#pragma clang diagnostic push +#pragma ide diagnostic ignored "OCUnusedMacroInspection" + +// clang format off +#define _USE_MATH_DEFINES +#include +// clang format on + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "math3d.h" +#include "nxdk_missing_defines.h" +#include "third_party/swizzle.h" + +typedef struct TextureFormatInfo { + SDL_PixelFormatEnum SdlFormat; + uint32_t XboxFormat; + uint16_t XboxBpp; // bytes per pixel + bool XboxSwizzled; + bool RequireConversion; + const char *Name; +} TextureFormatInfo; + +#pragma pack(1) +typedef struct Vertex { + float pos[3]; + float texcoord[2]; + float normal[3]; +} Vertex; + +#pragma pack() + +static Vertex *alloc_vertices; // texcoords 0 to kFramebufferWidth/kFramebufferHeight +static Vertex *alloc_vertices_swizzled; // texcoords normalized 0 to 1 + +static constexpr int kFramebufferWidth = 640; +static constexpr int kFramebufferHeight = 480; +static constexpr int kTextureWidth = 256; +static constexpr int kTextureHeight = 256; + +MATRIX m_model, m_view, m_proj; + +VECTOR v_cam_pos = {0, 0.05, 1.07, 1}; +VECTOR v_cam_rot = {0, 0, 0, 1}; +VECTOR v_light_dir = {0, 0, 1, 1}; + +#include "resources/texture.h" +#include "resources/verts.h" + +#define MASK(mask, val) (((val) << (ffs(mask) - 1)) & (mask)) +#define MAXRAM 0x03FFAFFF + +static void init_vertices(); +static void init_matrices(); +static void matrix_viewport(float out[4][4], float x, float y, float width, float height, float z_min, float z_max); +static void init_shader(); +static void set_attrib_pointer(unsigned int index, unsigned int format, unsigned int size, unsigned int stride, + const void *data); +static void draw_arrays(unsigned int mode, int start, int count); +static int update_texture_memory(uint8_t *texture_memory, TextureFormatInfo format, int width, int height); +static void save_framebuffer(uint8_t *framebuffer, int format_map_index); + +static constexpr TextureFormatInfo format_map[] = { + + // swizzled + {SDL_PIXELFORMAT_ABGR8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8, 4, true, false, "A8B8G8R8"}, + {SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8, 4, true, false, "R8G8B8A8"}, + {SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8, 4, true, false, "A8R8G8B8"}, + {SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8, 4, true, false, "X8R8G8B8"}, + {SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8, 4, true, false, "B8G8R8A8"}, + {SDL_PIXELFORMAT_RGB565, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5, 2, true, false, "R5G6B5"}, + {SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5, 2, true, false, "A1R5G5B5"}, + {SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5, 2, true, false, "X1R5G5B5"}, + {SDL_PIXELFORMAT_ARGB4444, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4, 2, true, false, "A4R4G4B4"}, + + // linear unsigned + {SDL_PIXELFORMAT_ABGR8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8, 4, false, false, "A8B8G8R8"}, + {SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8, 4, false, false, "R8G8B8A8"}, + {SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8, 4, false, false, "A8R8G8B8"}, + {SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8, 4, false, false, "X8R8G8B8"}, + {SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8, 4, false, false, "B8G8R8A8"}, + {SDL_PIXELFORMAT_RGB565, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5, 2, false, false, "R5G6B5"}, + {SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5, 2, false, false, "A1R5G5B5"}, + {SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5, 2, false, false, "X1R5G5B5"}, + {SDL_PIXELFORMAT_ARGB4444, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4, 2, false, false, "A4R4G4B4"}, + + // yuv color space + // Each 4 bytes represent the color for 2 neighboring pixels: + // [ U0 | Y0 | V0 | Y1 ] + // Y0 is the brightness of pixel 0, Y1 the brightness of pixel 1. + // U0 and V0 is the color of both pixels. (second pixel is the one sampled? + // or averaged? doesn't really matter here) + // https://docs.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering#converting-8-bit-yuv-to-rgb888 + {SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8, 2, false, true, "YUY2"}, + {SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8, 2, false, true, "UYVY"}, + + //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16, false, true, "Y16" }, + //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8, true, true, "SZ_Y8" }, + //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8, false, true, "Y8" }, + //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8, true, true, "SZ_AY8" }, + //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8, false, true, "AY8" }, + //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8, true, true, "SZ_A8" }, + //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8, true, true, "SZ_A8Y8" }, + + // misc formats + //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5, false, true, "DXT1" }, + //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8, false, true, "DXT3" }, + //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8, false, true, "DXT5" }, + //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8, true, true, "SZ_G8B8" }, + {SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8, 2, false, true, "G8B8"}, + //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_D16, false, true, "D16" }, // TODO: implement in + //xemu + //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LIN_F16, false, true, "LIN_F16" }, // TODO: + //implement in xemu + //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8, true, true, "SZ_R8B8" }, + //{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5, true, true, "R6G5B5" } + + // TODO: define others here +}; +static constexpr int kNumFormats = sizeof(format_map) / sizeof(format_map[0]); + +// bitscan forward +int bsf(int val) { __asm bsf eax, val } + +/* Main program function */ +int main() { + uint32_t *p; + + XVideoSetMode(kFramebufferWidth, kFramebufferHeight, 32, REFRESH_DEFAULT); + + // initialize input for the first gamepad + SDL_InitSubSystem(SDL_INIT_GAMECONTROLLER); + SDL_GameController *gameController = SDL_GameControllerOpen(0); + if (!gameController) { + debugPrint("Failed to initialize input for gamepad 0."); + Sleep(2000); + return 1; + } + + if (!(IMG_Init(IMG_INIT_PNG) & IMG_INIT_PNG)) { + debugPrint("Failed to initialize SDL_image PNG mode."); + Sleep(2000); + return 1; + } + + int status = pb_init(); + if (status) { + debugPrint("pb_init Error %d\n", status); + Sleep(2000); + return 1; + } + + pb_show_front_screen(); + + /* Load constant rendering things (shaders, geometry) */ + init_shader(); + + init_vertices(); + + // allocate texture memory buffer large enough for all types + auto texture_memory = static_cast(MmAllocateContiguousMemoryEx( + kTextureWidth * kTextureHeight * 4, 0, MAXRAM, 0, PAGE_WRITECOMBINE | PAGE_READWRITE)); + int format_map_index = 0; + int update_texture_result = + update_texture_memory(texture_memory, format_map[format_map_index], kTextureWidth, kTextureHeight); + + init_matrices(); + +#pragma clang diagnostic push +#pragma ide diagnostic ignored "EndlessLoop" + + uint8_t *framebuffer = XVideoGetFB(); + + bool toggle_format_allowed = true; + bool render_changed = true; + while (true) { + // cycle current texture based on A or B button presses + SDL_GameControllerUpdate(); + bool a_pressed = SDL_GameControllerGetButton(gameController, SDL_CONTROLLER_BUTTON_A); + bool b_pressed = SDL_GameControllerGetButton(gameController, SDL_CONTROLLER_BUTTON_B); + if (a_pressed || b_pressed) { + if (toggle_format_allowed) { + if (a_pressed) { + format_map_index = (format_map_index + 1) % kNumFormats; + } else { + if (--format_map_index < 0) { + format_map_index = kNumFormats - 1; + } + } + update_texture_result = + update_texture_memory(texture_memory, format_map[format_map_index], kTextureWidth, kTextureHeight); + render_changed = true; + } + toggle_format_allowed = false; + } else { + toggle_format_allowed = true; + } + + pb_wait_for_vbl(); + pb_reset(); + pb_target_back_buffer(); + + /* Clear depth & stencil buffers */ + pb_erase_depth_stencil_buffer(0, 0, kFramebufferWidth, kFramebufferHeight); + pb_fill(0, 0, kFramebufferWidth, kFramebufferHeight, 0xff000000); + pb_erase_text_screen(); + + while (pb_busy()) { + /* Wait for completion... */ + } + + /* + * Setup texture stages + */ + + /* Enable texture stage 0 */ + /* FIXME: Use constants instead of the hardcoded values below */ + p = pb_begin(); + + // first one seems to be needed + p = pb_push1(p, NV097_SET_FRONT_FACE, NV097_SET_FRONT_FACE_V_CCW); + p = pb_push1(p, NV097_SET_DEPTH_TEST_ENABLE, true); + + // Enable alpha blending functionality + p = pb_push1(p, NV097_SET_BLEND_ENABLE, true); + + // Set the alpha blend source (s) and destination (d) factors + p = pb_push1(p, NV097_SET_BLEND_FUNC_SFACTOR, NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA); + p = pb_push1(p, NV097_SET_BLEND_FUNC_DFACTOR, NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_ALPHA); + + // yuv requires color space conversion + if (format_map[format_map_index].XboxFormat == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 || + format_map[format_map_index].XboxFormat == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8) { + p = pb_push1(p, NV097_SET_CONTROL0, + MASK(NV097_SET_CONTROL0_COLOR_SPACE_CONVERT, NV097_SET_CONTROL0_COLOR_SPACE_CONVERT_CRYCB_TO_RGB)); + } + + DWORD format_mask = + MASK(NV097_SET_TEXTURE_FORMAT_CONTEXT_DMA, 1) | MASK(NV097_SET_TEXTURE_FORMAT_CUBEMAP_ENABLE, 0) | + MASK(NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE, NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE_COLOR) | + MASK(NV097_SET_TEXTURE_FORMAT_DIMENSIONALITY, 2) | + MASK(NV097_SET_TEXTURE_FORMAT_COLOR, format_map[format_map_index].XboxFormat) | + MASK(NV097_SET_TEXTURE_FORMAT_MIPMAP_LEVELS, 1) | + MASK(NV097_SET_TEXTURE_FORMAT_BASE_SIZE_U, format_map[format_map_index].XboxSwizzled ? bsf(kTextureWidth) : 0) | + MASK(NV097_SET_TEXTURE_FORMAT_BASE_SIZE_V, + format_map[format_map_index].XboxSwizzled ? bsf(kTextureHeight) : 0) | + MASK(NV097_SET_TEXTURE_FORMAT_BASE_SIZE_P, 0); + + // set stage 0 texture address & format + p = pb_push2(p, NV20_TCL_PRIMITIVE_3D_TX_OFFSET(0), (DWORD)texture_memory & 0x03ffffff, format_mask); + + if (!format_map[format_map_index].XboxSwizzled) { + // set stage 0 texture pitch (pitch<<16) + p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_NPOT_PITCH(0), + (format_map[format_map_index].XboxBpp * kTextureWidth) << 16); + + // set stage 0 texture kFramebufferWidth & kFramebufferHeight + // ((width<<16)|kFramebufferHeight) + p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_NPOT_SIZE(0), (kTextureWidth << 16) | kTextureHeight); + } + + // set stage 0 texture modes + // (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp 4=border 5=clamp to edge) + p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_WRAP(0), 0x00030303); + + // set stage 0 texture enable flags + p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_ENABLE(0), 0x4003ffc0); + + // set stage 0 texture filters (AA!) + p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_FILTER(0), 0x04074000); + + pb_end(p); + + /* Disable other texture stages */ + p = pb_begin(); + + // set stage 1 texture enable flags (bit30 disabled) + p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_ENABLE(1), 0x0003ffc0); + + // set stage 2 texture enable flags (bit30 disabled) + p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_ENABLE(2), 0x0003ffc0); + + // set stage 3 texture enable flags (bit30 disabled) + p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_ENABLE(3), 0x0003ffc0); + + // set stage 1 texture modes (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp + // 4=border 5=clamp to edge) + p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_WRAP(1), 0x00030303); + + // set stage 2 texture modes (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp + // 4=border 5=clamp to edge) + p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_WRAP(2), 0x00030303); + + // set stage 3 texture modes (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp + // 4=border 5=clamp to edge) + p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_WRAP(3), 0x00030303); + + // set stage 1 texture filters (no AA, stage not even used) + p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_FILTER(1), 0x02022000); + + // set stage 2 texture filters (no AA, stage not even used) + p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_FILTER(2), 0x02022000); + + // set stage 3 texture filters (no AA, stage not even used) + p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_FILTER(3), 0x02022000); + + pb_end(p); + + /* Send shader constants + * + * WARNING: Changing shader source code may impact constant locations! + * Check the intermediate file (*.inl) for the expected locations after + * changing the code. + */ + p = pb_begin(); + + /* Set shader constants cursor at C0 */ + p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_ID, 96); + + /* Send the model matrix */ + pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 16); + memcpy(p, m_model, 16 * 4); + p += 16; + + /* Send the view matrix */ + pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 16); + memcpy(p, m_view, 16 * 4); + p += 16; + + /* Send the projection matrix */ + pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 16); + memcpy(p, m_proj, 16 * 4); + p += 16; + + /* Send camera position */ + pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 4); + memcpy(p, v_cam_pos, 4 * 4); + p += 4; + + /* Send light direction */ + pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 4); + memcpy(p, v_light_dir, 4 * 4); + p += 4; + + /* Send shader constants */ + float constants_0[4] = {0, 0, 0, 0}; + pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 4); + memcpy(p, constants_0, 4 * 4); + p += 4; + + /* Clear all attributes */ + pb_push(p++, NV097_SET_VERTEX_DATA_ARRAY_FORMAT, 16); + for (auto i = 0; i < 16; i++) { + *(p++) = 2; + } + pb_end(p); + + /* + * Setup vertex attributes + */ + + Vertex *vptr = format_map[format_map_index].XboxSwizzled ? alloc_vertices_swizzled : alloc_vertices; + + /* Set vertex position attribute */ + set_attrib_pointer(NV2A_VERTEX_ATTR_POSITION, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F, 3, sizeof(Vertex), + &vptr[0].pos); + + /* Set texture coordinate attribute */ + set_attrib_pointer(NV2A_VERTEX_ATTR_TEXTURE0, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F, 2, sizeof(Vertex), + &vptr[0].texcoord); + + /* Set vertex normal attribute */ + set_attrib_pointer(NV2A_VERTEX_ATTR_NORMAL, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F, 3, sizeof(Vertex), + &vptr[0].normal); + + /* Begin drawing triangles */ + draw_arrays(NV097_SET_BEGIN_END_OP_TRIANGLES, 0, kNumVertices); + + /* Draw some text on the screen */ + pb_print("N: %s\n", format_map[format_map_index].Name); + pb_print("F: 0x%x\n", format_map[format_map_index].XboxFormat); + pb_print("SZ: %d\n", format_map[format_map_index].XboxSwizzled); + pb_print("C: %d\n", format_map[format_map_index].RequireConversion); + pb_print("W: %d\n", kTextureWidth); + pb_print("H: %d\n", kTextureHeight); + pb_print("P: %d\n", format_map[format_map_index].XboxBpp * kTextureWidth); + pb_print("ERR: %d\n", update_texture_result); + pb_draw_text_screen(); + + while (pb_busy()) { + /* Wait for completion... */ + } + + /* Swap buffers (if we can) */ + while (pb_finished()) { + /* Not ready to swap yet */ + } + + if (render_changed) { + render_changed = false; + save_framebuffer(framebuffer, format_map_index); + } + } +#pragma clang diagnostic pop + + /* Unreachable cleanup code */ + SDL_GameControllerClose(gameController); + SDL_QuitSubSystem(SDL_INIT_GAMECONTROLLER); + MmFreeContiguousMemory(alloc_vertices); + MmFreeContiguousMemory(alloc_vertices_swizzled); + MmFreeContiguousMemory(texture_memory); + pb_show_debug_screen(); + pb_kill(); + return 0; +} + +static void save_framebuffer(uint8_t *framebuffer, int format_map_index) {} + +static int update_texture_memory(uint8_t *texture_memory, TextureFormatInfo format, int width, int height) { + // create source surface + SDL_Surface *gradient_surface = SDL_CreateRGBSurfaceWithFormat(0, width, height, 32, SDL_PIXELFORMAT_RGBA8888); + if (gradient_surface == nullptr) { + return 1; + } + + if (SDL_LockSurface(gradient_surface)) { + return 2; + } + + // TODO: have different color patterns controlled by alternate gamepad + // button(s) generate basic gradient pattern + auto pixels = static_cast(gradient_surface->pixels); + for (int y = 0; y < height; y++) + for (int x = 0; x < width; x++) { + int x_normal = static_cast(static_cast(x) * 255.0f / static_cast(width)); + int y_normal = static_cast(static_cast(y) * 255.0f / static_cast(height)); + pixels[y * width + x] = + SDL_MapRGBA(gradient_surface->format, y_normal, x_normal, 255 - y_normal, x_normal + y_normal); + } + + SDL_UnlockSurface(gradient_surface); + + // if conversion required, do so, otherwise use SDL to convert + if (format.RequireConversion) { + uint8_t *dstP = texture_memory; + + // TODO: potential reference material - + // https://github.com/scalablecory/colors/blob/master/color.c + switch (format.XboxFormat) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8: // YUY2 aka + // YUYV + for (int y = 0; y < height; y++) + for (int x = 0; x < width; x += 2) { + uint8_t R0, G0, B0, R1, G1, B1; + SDL_GetRGB(pixels[y * width + x], gradient_surface->format, &R0, &G0, &B0); + SDL_GetRGB(pixels[y * width + x + 1], gradient_surface->format, &R1, &G1, &B1); + dstP[0] = (0.257f * R0) + (0.504f * G0) + (0.098f * B0) + 16; // Y0 + dstP[1] = -(0.148f * R1) - (0.291f * G1) + (0.439f * B1) + 128; // U + dstP[2] = (0.257f * R1) + (0.504f * G1) + (0.098f * B1) + 16; // Y1 + dstP[3] = (0.439f * R1) - (0.368f * G1) - (0.071f * B1) + 128; // V + dstP += 4; + } + break; + + case NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8: // UYVY + for (int y = 0; y < height; y++) + for (int x = 0; x < width; x += 2) { + uint8_t R0, G0, B0, R1, G1, B1; + SDL_GetRGB(pixels[y * width + x], gradient_surface->format, &R0, &G0, &B0); + SDL_GetRGB(pixels[y * width + x + 1], gradient_surface->format, &R1, &G1, &B1); + dstP[0] = -(0.148f * R1) - (0.291f * G1) + (0.439f * B1) + 128; // U + dstP[1] = (0.257f * R0) + (0.504f * G0) + (0.098f * B0) + 16; // Y0 + dstP[2] = (0.439f * R1) - (0.368f * G1) - (0.071f * B1) + 128; // V + dstP[3] = (0.257f * R1) + (0.504f * G1) + (0.098f * B1) + 16; // Y1 + dstP += 4; + } + break; + + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8: + // TODO: for now, just let default gradient happen + break; + + default: + SDL_FreeSurface(gradient_surface); + return 3; + break; + } + + // TODO: swizzling + + SDL_FreeSurface(gradient_surface); + } else { + // standard SDL conversion to destination format + SDL_Surface *new_surf = SDL_ConvertSurfaceFormat(gradient_surface, format.SdlFormat, 0); + SDL_FreeSurface(gradient_surface); + if (!new_surf) { + return 4; + } + + // copy pixels over to texture memory, swizzling if desired + if (format.XboxSwizzled) { + swizzle_rect((uint8_t *)new_surf->pixels, new_surf->w, new_surf->h, texture_memory, new_surf->pitch, + new_surf->format->BytesPerPixel); + } else { + memcpy(texture_memory, new_surf->pixels, new_surf->pitch * new_surf->h); + } + + SDL_FreeSurface(new_surf); + } + + return 0; +} + +static void init_vertices() { + // real nv2a hardware seems to cache this and not honor updates? have separate + // vertex buffers for swizzled and linear for now... + alloc_vertices = static_cast( + MmAllocateContiguousMemoryEx(sizeof(vertices), 0, MAXRAM, 0, PAGE_WRITECOMBINE | PAGE_READWRITE)); + alloc_vertices_swizzled = static_cast( + MmAllocateContiguousMemoryEx(sizeof(vertices), 0, MAXRAM, 0, PAGE_WRITECOMBINE | PAGE_READWRITE)); + memcpy(alloc_vertices, vertices, sizeof(vertices)); + memcpy(alloc_vertices_swizzled, vertices, sizeof(vertices)); + for (int i = 0; i < kNumVertices; i++) { + if (alloc_vertices[i].texcoord[0] != 0.0f) { + alloc_vertices[i].texcoord[0] = static_cast(kTextureWidth); + } + + if (alloc_vertices[i].texcoord[1] != 0.0f) { + alloc_vertices[i].texcoord[1] = static_cast(kTextureHeight); + } + } +} + +static void init_matrices() { + /* Create view matrix (our camera is static) */ + matrix_unit(m_view); + create_world_view(m_view, v_cam_pos, v_cam_rot); + + /* Create projection matrix */ + matrix_unit(m_proj); + create_view_screen(m_proj, (float)kFramebufferWidth / (float)kFramebufferHeight, -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, + 10000.0f); + + /* Create viewport matrix, combine with projection */ + { + float m_viewport[4][4]; + matrix_viewport(m_viewport, 0, 0, (float)kFramebufferWidth, (float)kFramebufferHeight, 0, 65536.0f); + matrix_multiply(m_proj, m_proj, (float *)m_viewport); + } + + /* Create local->world matrix given our updated object */ + matrix_unit(m_model); +} + +/* Construct a viewport transformation matrix */ +static void matrix_viewport(float out[4][4], float x, float y, float width, float height, float z_min, float z_max) { + memset(out, 0, 4 * 4 * sizeof(float)); + out[0][0] = width / 2.0f; + out[1][1] = height / -2.0f; + out[2][2] = (z_max - z_min) / 2.0f; + out[3][3] = 1.0f; + out[3][0] = x + width / 2.0f; + out[3][1] = y + height / 2.0f; + out[3][2] = (z_min + z_max) / 2.0f; +} + +/* Load the shader we will render with */ +static void init_shader() { + uint32_t *p; + int i; + + /* Setup vertex shader */ + uint32_t vs_program[] = { +// clang format off +#include "vs.inl" + // clang format on + }; + + p = pb_begin(); + + /* Set run address of shader */ + p = pb_push1(p, NV097_SET_TRANSFORM_PROGRAM_START, 0); + + /* Set execution mode */ + p = pb_push1( + p, NV097_SET_TRANSFORM_EXECUTION_MODE, + MASK(NV097_SET_TRANSFORM_EXECUTION_MODE_MODE, NV097_SET_TRANSFORM_EXECUTION_MODE_MODE_PROGRAM) | + MASK(NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE, NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE_PRIV)); + + p = pb_push1(p, NV097_SET_TRANSFORM_PROGRAM_CXT_WRITE_EN, 0); + pb_end(p); + + /* Set cursor and begin copying program */ + p = pb_begin(); + p = pb_push1(p, NV097_SET_TRANSFORM_PROGRAM_LOAD, 0); + pb_end(p); + + /* Copy program instructions (16-bytes each) */ + for (i = 0; i < sizeof(vs_program) / 16; i++) { + p = pb_begin(); + pb_push(p++, NV097_SET_TRANSFORM_PROGRAM, 4); + memcpy(p, &vs_program[i * 4], 4 * 4); + p += 4; + pb_end(p); + } + + /* Setup fragment shader */ + p = pb_begin(); + +// clang format off +#include "ps.inl" + // clang format on + pb_end(p); +} + +/* Set an attribute pointer */ +static void set_attrib_pointer(uint32_t index, uint32_t format, unsigned int size, uint32_t stride, const void *data) { + uint32_t *p = pb_begin(); + p = pb_push1(p, NV097_SET_VERTEX_DATA_ARRAY_FORMAT + index * 4, + MASK(NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE, format) | + MASK(NV097_SET_VERTEX_DATA_ARRAY_FORMAT_SIZE, size) | + MASK(NV097_SET_VERTEX_DATA_ARRAY_FORMAT_STRIDE, stride)); + p = pb_push1(p, NV097_SET_VERTEX_DATA_ARRAY_OFFSET + index * 4, (uint32_t)data & 0x03ffffff); + pb_end(p); +} + +/* Send draw commands for the triangles */ +static void draw_arrays(unsigned int mode, int start, int count) { + uint32_t *p = pb_begin(); + p = pb_push1(p, NV097_SET_BEGIN_END, mode); + + // bit 30 means all params go to same register 0x1810 + p = pb_push1(p, 0x40000000 | NV097_DRAW_ARRAYS, + MASK(NV097_DRAW_ARRAYS_COUNT, (count - 1)) | MASK(NV097_DRAW_ARRAYS_START_INDEX, start)); + + p = pb_push1(p, NV097_SET_BEGIN_END, NV097_SET_BEGIN_END_OP_END); + + pb_end(p); +} + +#pragma clang diagnostic pop \ No newline at end of file diff --git a/math3d.c b/math3d.c index fc22b78..7a963a6 100644 --- a/math3d.c +++ b/math3d.c @@ -1,352 +1,375 @@ -//port of ooPo's ps2sdk math3d library - -#include -#include -#include - -#include "math3d.h" - -unsigned long times(void *); -#define cpu_ticks() times(0) - - - -//vector functions - -void vector_apply(VECTOR output, VECTOR input0, MATRIX input1) -{ - VECTOR work; - - work[_X]=input0[_X]*input1[_11]+input0[_Y]*input1[_12]+input0[_Z]*input1[_13]+input0[_W]*input1[_14]; - work[_Y]=input0[_X]*input1[_21]+input0[_Y]*input1[_22]+input0[_Z]*input1[_23]+input0[_W]*input1[_24]; - work[_Z]=input0[_X]*input1[_31]+input0[_Y]*input1[_32]+input0[_Z]*input1[_33]+input0[_W]*input1[_34]; - work[_W]=input0[_X]*input1[_41]+input0[_Y]*input1[_42]+input0[_Z]*input1[_43]+input0[_W]*input1[_44]; - - // Output the result. - vector_copy(output, work); -} - -void vector_clamp(VECTOR output, VECTOR input0, float min, float max) -{ - VECTOR work; - - // Copy the vector. - vector_copy(work, input0); - - // Clamp the minimum values. - if (work[_X] < min) { work[_X] = min; } - if (work[_Y] < min) { work[_Y] = min; } - if (work[_Z] < min) { work[_Z] = min; } - if (work[_W] < min) { work[_W] = min; } - - // Clamp the maximum values. - if (work[_X] > max) { work[_X] = max; } - if (work[_Y] > max) { work[_Y] = max; } - if (work[_Z] > max) { work[_Z] = max; } - if (work[_W] > max) { work[_W] = max; } - - // Output the result. - vector_copy(output, work); -} - -void vector_copy(VECTOR output, VECTOR input0) -{ - memcpy(output,input0,sizeof(VECTOR)); -} - -float vector_innerproduct(VECTOR input0, VECTOR input1) -{ - VECTOR work0, work1; - - // Normalize the first vector. - work0[_X] = (input0[_X] / input0[_W]); - work0[_Y] = (input0[_Y] / input0[_W]); - work0[_Z] = (input0[_Z] / input0[_W]); - work0[_W] = 1.00f; - - // Normalize the second vector. - work1[_X] = (input1[_X] / input1[_W]); - work1[_Y] = (input1[_Y] / input1[_W]); - work1[_Z] = (input1[_Z] / input1[_W]); - work1[_W] = 1.00f; - - // Return the inner product. - return (work0[_X] * work1[_X]) + (work0[_Y] * work1[_Y]) + (work0[_Z] * work1[_Z]); -} - -void vector_multiply(VECTOR output, VECTOR input0, VECTOR input1) -{ - VECTOR work; - - // Multiply the vectors together. - work[_X] = input0[_X] * input1[_X]; - work[_Y] = input0[_Y] * input1[_Y]; - work[_Z] = input0[_Z] * input1[_Z]; - work[_W] = input0[_W] * input1[_W]; - - // Output the result. - vector_copy(output, work); -} - -void vector_normalize(VECTOR output, VECTOR input0) -{ - float k; - - k=1.0f/sqrt(input0[_X]*input0[_X]+input0[_Y]*input0[_Y]+input0[_Z]*input0[_Z]); - output[_X]*=k; - output[_Y]*=k; - output[_Z]*=k; -} - -void vector_outerproduct(VECTOR output, VECTOR input0, VECTOR input1) -{ - VECTOR work; - - work[_X]=input0[_Y]*input1[_Z]-input0[_Z]*input1[_Y]; - work[_Y]=input0[_Z]*input1[_X]-input0[_X]*input1[_Z]; - work[_Z]=input0[_X]*input1[_Y]-input0[_Y]*input1[_X]; - - // Output the result. - vector_copy(output, work); -} - -//matrices function - -void matrix_copy(MATRIX output, MATRIX input0) -{ - memcpy(output,input0,sizeof(MATRIX)); -} - -void matrix_inverse(MATRIX output, MATRIX input0) -{ - MATRIX work; - - // Calculate the inverse of the matrix. - matrix_transpose(work, input0); - work[_14] = 0.00f; - work[_24] = 0.00f; - work[_34] = 0.00f; - work[_41] = -(input0[_41] * work[_11] + input0[_42] * work[_21] + input0[_43] * work[_31]); - work[_42] = -(input0[_41] * work[_12] + input0[_42] * work[_22] + input0[_43] * work[_32]); - work[_43] = -(input0[_41] * work[_13] + input0[_42] * work[_23] + input0[_43] * work[_33]); - work[_44] = 1.00f; - - // Output the result. - matrix_copy(output, work); -} - -void matrix_multiply(MATRIX output, MATRIX input0, MATRIX input1) -{ - MATRIX work; - - work[_11]=input0[_11]*input1[_11]+input0[_12]*input1[_21]+input0[_13]*input1[_31]+input0[_14]*input1[_41]; - work[_12]=input0[_11]*input1[_12]+input0[_12]*input1[_22]+input0[_13]*input1[_32]+input0[_14]*input1[_42]; - work[_13]=input0[_11]*input1[_13]+input0[_12]*input1[_23]+input0[_13]*input1[_33]+input0[_14]*input1[_43]; - work[_14]=input0[_11]*input1[_14]+input0[_12]*input1[_24]+input0[_13]*input1[_34]+input0[_14]*input1[_44]; - work[_21]=input0[_21]*input1[_11]+input0[_22]*input1[_21]+input0[_23]*input1[_31]+input0[_24]*input1[_41]; - work[_22]=input0[_21]*input1[_12]+input0[_22]*input1[_22]+input0[_23]*input1[_32]+input0[_24]*input1[_42]; - work[_23]=input0[_21]*input1[_13]+input0[_22]*input1[_23]+input0[_23]*input1[_33]+input0[_24]*input1[_43]; - work[_24]=input0[_21]*input1[_14]+input0[_22]*input1[_24]+input0[_23]*input1[_34]+input0[_24]*input1[_44]; - work[_31]=input0[_31]*input1[_11]+input0[_32]*input1[_21]+input0[_33]*input1[_31]+input0[_34]*input1[_41]; - work[_32]=input0[_31]*input1[_12]+input0[_32]*input1[_22]+input0[_33]*input1[_32]+input0[_34]*input1[_42]; - work[_33]=input0[_31]*input1[_13]+input0[_32]*input1[_23]+input0[_33]*input1[_33]+input0[_34]*input1[_43]; - work[_34]=input0[_31]*input1[_14]+input0[_32]*input1[_24]+input0[_33]*input1[_34]+input0[_34]*input1[_44]; - work[_41]=input0[_41]*input1[_11]+input0[_42]*input1[_21]+input0[_43]*input1[_31]+input0[_44]*input1[_41]; - work[_42]=input0[_41]*input1[_12]+input0[_42]*input1[_22]+input0[_43]*input1[_32]+input0[_44]*input1[_42]; - work[_43]=input0[_41]*input1[_13]+input0[_42]*input1[_23]+input0[_43]*input1[_33]+input0[_44]*input1[_43]; - work[_44]=input0[_41]*input1[_14]+input0[_42]*input1[_24]+input0[_43]*input1[_34]+input0[_44]*input1[_44]; - - // Output the result. - matrix_copy(output, work); -} - -void matrix_rotate(MATRIX output, MATRIX input0, VECTOR input1) -{ - MATRIX work; - - // Apply the z-axis rotation. - matrix_unit(work); - work[_11] = cosf(input1[2]); - work[_12] = sinf(input1[2]); - work[_21] = -sinf(input1[2]); - work[_22] = cosf(input1[2]); - matrix_multiply(output, input0, work); - - // Apply the y-axis rotation. - matrix_unit(work); - work[_11] = cosf(input1[1]); - work[_13] = -sinf(input1[1]); - work[_31] = sinf(input1[1]); - work[_33] = cosf(input1[1]); - matrix_multiply(output, output, work); - - // Apply the x-axis rotation. - matrix_unit(work); - work[_22] = cosf(input1[0]); - work[_23] = sinf(input1[0]); - work[_32] = -sinf(input1[0]); - work[_33] = cosf(input1[0]); - matrix_multiply(output, output, work); -} - -void matrix_scale(MATRIX output, MATRIX input0, VECTOR input1) -{ - MATRIX work; - - // Apply the scaling. - matrix_unit(work); - work[_11] = input1[_X]; - work[_22] = input1[_Y]; - work[_33] = input1[_Z]; - matrix_multiply(output, input0, work); -} - -void matrix_translate(MATRIX output, MATRIX input0, VECTOR input1) -{ - MATRIX work; - - // Apply the translation. - matrix_unit(work); - work[_41] = input1[_X]; - work[_42] = input1[_Y]; - work[_43] = input1[_Z]; - matrix_multiply(output, input0, work); -} - -void matrix_transpose(MATRIX output, MATRIX input0) -{ - MATRIX work; - - // Transpose the matrix. - work[_11] = input0[_11]; - work[_12] = input0[_21]; - work[_13] = input0[_31]; - work[_14] = input0[_41]; - work[_21] = input0[_12]; - work[_22] = input0[_22]; - work[_23] = input0[_32]; - work[_24] = input0[_42]; - work[_31] = input0[_13]; - work[_32] = input0[_23]; - work[_33] = input0[_33]; - work[_34] = input0[_43]; - work[_41] = input0[_14]; - work[_42] = input0[_24]; - work[_43] = input0[_34]; - work[_44] = input0[_44]; - - // Output the result. - matrix_copy(output, work); -} - -void matrix_unit(MATRIX output) -{ - // Create a unit matrix. - memset(output, 0, sizeof(MATRIX)); - output[_11] = 1.00f; - output[_22] = 1.00f; - output[_33] = 1.00f; - output[_44] = 1.00f; -} - -//creation functions - -void create_local_world(MATRIX local_world, VECTOR translation, VECTOR rotation) -{ - // Create the local_world matrix. - matrix_unit(local_world); - matrix_rotate(local_world, local_world, rotation); - matrix_translate(local_world, local_world, translation); -} - -void create_local_light(MATRIX local_light, VECTOR rotation) -{ - // Create the local_light matrix. - matrix_unit(local_light); - matrix_rotate(local_light, local_light, rotation); -} - - -void create_world_view(MATRIX world_view, VECTOR translation, VECTOR rotation) -{ - VECTOR work0, work1; - - // Reverse the translation. - work0[_X] = -translation[_X]; - work0[_Y] = -translation[_Y]; - work0[_Z] = -translation[_Z]; - work0[_W] = translation[_W]; - - // Reverse the rotation. - work1[_X] = -rotation[_X]; - work1[_Y] = -rotation[_Y]; - work1[_Z] = -rotation[_Z]; - work1[_W] = rotation[_W]; - - // Create the world_view matrix. - matrix_unit(world_view); - matrix_translate(world_view, world_view, work0); - matrix_rotate(world_view, world_view, work1); -} - -void create_view_screen(MATRIX view_screen, float aspect, float left, float right, float bottom, float top, float near, float far) -{ -/* We want to create a matrix that transforms - field of view frustum (a truncated pyramid) - into a normalized cuboid (for fast hardware clipping): - w, 0, 0, 0, - 0, -h, 0, 0, - 0, 0, (f+n) / (f-n), -1, - 0, 0, (2*f*n) / (f-n), 0 - (w:width,h:height,n:z near,f:z far) -*/ - - // Apply the aspect ratio adjustment. - left = (left * aspect); right = (right * aspect); - - // Create the view_screen matrix. -/* matrix_unit(view_screen); - view_screen[_11] = (2 * near) / (right - left); - view_screen[_22] = (2 * near) / (top - bottom); - view_screen[_31] = (right + left) / (right - left); - view_screen[_32] = (top + bottom) / (top - bottom); - view_screen[_33] = (far + near) / (far - near); - view_screen[_34] = -1.00f; - view_screen[_43] = (2 * far * near) / (far - near); - view_screen[_44] = 0.00f; - - //This is good for ps2 clipping, where pixel is considered visible if: - //-w < x < w - //-w < y < w - //-w < z < w - //It's not automatic, it's done by using 'clipw' and testing flags in vu1 code - //Result of the test allows to exclude entire triangle -*/ - - //For xbox1 clipping, pixel is considered visible if: - //-w < x < w - //-w < y < w - // 0 < z < w - //It's automatic and verified for each pixel before pixel shader is called - - //so we need this : - matrix_unit(view_screen); - view_screen[_11] = (2 * near) / (right - left); - view_screen[_22] = (2 * near) / (top - bottom); - view_screen[_31] = - (right + left) / (right - left); - view_screen[_32] = - (top + bottom) / (top - bottom); - view_screen[_33] = - far / (far - near); - view_screen[_34] = - 1.00f; - view_screen[_43] = near * far / (far - near); - view_screen[_44] = 0.00f; -} - -void create_local_screen(MATRIX local_screen, MATRIX local_world, MATRIX world_view, MATRIX view_screen) -{ - // Create the local_screen matrix. - matrix_unit(local_screen); - matrix_multiply(local_screen, local_screen, local_world); - matrix_multiply(local_screen, local_screen, world_view); - matrix_multiply(local_screen, local_screen, view_screen); -} - +// port of ooPo's ps2sdk math3d library + +#include "math3d.h" + +#include +#include +#include + +unsigned long times(void *); +#define cpu_ticks() times(0) + +// vector functions + +void vector_apply(VECTOR output, VECTOR input0, MATRIX input1) { + VECTOR work; + + work[_X] = input0[_X] * input1[_11] + input0[_Y] * input1[_12] + + input0[_Z] * input1[_13] + input0[_W] * input1[_14]; + work[_Y] = input0[_X] * input1[_21] + input0[_Y] * input1[_22] + + input0[_Z] * input1[_23] + input0[_W] * input1[_24]; + work[_Z] = input0[_X] * input1[_31] + input0[_Y] * input1[_32] + + input0[_Z] * input1[_33] + input0[_W] * input1[_34]; + work[_W] = input0[_X] * input1[_41] + input0[_Y] * input1[_42] + + input0[_Z] * input1[_43] + input0[_W] * input1[_44]; + + // Output the result. + vector_copy(output, work); +} + +void vector_clamp(VECTOR output, VECTOR input0, float min, float max) { + VECTOR work; + + // Copy the vector. + vector_copy(work, input0); + + // Clamp the minimum values. + if (work[_X] < min) { + work[_X] = min; + } + if (work[_Y] < min) { + work[_Y] = min; + } + if (work[_Z] < min) { + work[_Z] = min; + } + if (work[_W] < min) { + work[_W] = min; + } + + // Clamp the maximum values. + if (work[_X] > max) { + work[_X] = max; + } + if (work[_Y] > max) { + work[_Y] = max; + } + if (work[_Z] > max) { + work[_Z] = max; + } + if (work[_W] > max) { + work[_W] = max; + } + + // Output the result. + vector_copy(output, work); +} + +void vector_copy(VECTOR output, VECTOR input0) { + memcpy(output, input0, sizeof(VECTOR)); +} + +float vector_innerproduct(VECTOR input0, VECTOR input1) { + VECTOR work0, work1; + + // Normalize the first vector. + work0[_X] = (input0[_X] / input0[_W]); + work0[_Y] = (input0[_Y] / input0[_W]); + work0[_Z] = (input0[_Z] / input0[_W]); + work0[_W] = 1.00f; + + // Normalize the second vector. + work1[_X] = (input1[_X] / input1[_W]); + work1[_Y] = (input1[_Y] / input1[_W]); + work1[_Z] = (input1[_Z] / input1[_W]); + work1[_W] = 1.00f; + + // Return the inner product. + return (work0[_X] * work1[_X]) + (work0[_Y] * work1[_Y]) + + (work0[_Z] * work1[_Z]); +} + +void vector_multiply(VECTOR output, VECTOR input0, VECTOR input1) { + VECTOR work; + + // Multiply the vectors together. + work[_X] = input0[_X] * input1[_X]; + work[_Y] = input0[_Y] * input1[_Y]; + work[_Z] = input0[_Z] * input1[_Z]; + work[_W] = input0[_W] * input1[_W]; + + // Output the result. + vector_copy(output, work); +} + +void vector_normalize(VECTOR output, VECTOR input0) { + float k; + + k = 1.0f / sqrt(input0[_X] * input0[_X] + input0[_Y] * input0[_Y] + + input0[_Z] * input0[_Z]); + output[_X] *= k; + output[_Y] *= k; + output[_Z] *= k; +} + +void vector_outerproduct(VECTOR output, VECTOR input0, VECTOR input1) { + VECTOR work; + + work[_X] = input0[_Y] * input1[_Z] - input0[_Z] * input1[_Y]; + work[_Y] = input0[_Z] * input1[_X] - input0[_X] * input1[_Z]; + work[_Z] = input0[_X] * input1[_Y] - input0[_Y] * input1[_X]; + + // Output the result. + vector_copy(output, work); +} + +// matrices function + +void matrix_copy(MATRIX output, MATRIX input0) { + memcpy(output, input0, sizeof(MATRIX)); +} + +void matrix_inverse(MATRIX output, MATRIX input0) { + MATRIX work; + + // Calculate the inverse of the matrix. + matrix_transpose(work, input0); + work[_14] = 0.00f; + work[_24] = 0.00f; + work[_34] = 0.00f; + work[_41] = -(input0[_41] * work[_11] + input0[_42] * work[_21] + + input0[_43] * work[_31]); + work[_42] = -(input0[_41] * work[_12] + input0[_42] * work[_22] + + input0[_43] * work[_32]); + work[_43] = -(input0[_41] * work[_13] + input0[_42] * work[_23] + + input0[_43] * work[_33]); + work[_44] = 1.00f; + + // Output the result. + matrix_copy(output, work); +} + +void matrix_multiply(MATRIX output, MATRIX input0, MATRIX input1) { + MATRIX work; + + work[_11] = input0[_11] * input1[_11] + input0[_12] * input1[_21] + + input0[_13] * input1[_31] + input0[_14] * input1[_41]; + work[_12] = input0[_11] * input1[_12] + input0[_12] * input1[_22] + + input0[_13] * input1[_32] + input0[_14] * input1[_42]; + work[_13] = input0[_11] * input1[_13] + input0[_12] * input1[_23] + + input0[_13] * input1[_33] + input0[_14] * input1[_43]; + work[_14] = input0[_11] * input1[_14] + input0[_12] * input1[_24] + + input0[_13] * input1[_34] + input0[_14] * input1[_44]; + work[_21] = input0[_21] * input1[_11] + input0[_22] * input1[_21] + + input0[_23] * input1[_31] + input0[_24] * input1[_41]; + work[_22] = input0[_21] * input1[_12] + input0[_22] * input1[_22] + + input0[_23] * input1[_32] + input0[_24] * input1[_42]; + work[_23] = input0[_21] * input1[_13] + input0[_22] * input1[_23] + + input0[_23] * input1[_33] + input0[_24] * input1[_43]; + work[_24] = input0[_21] * input1[_14] + input0[_22] * input1[_24] + + input0[_23] * input1[_34] + input0[_24] * input1[_44]; + work[_31] = input0[_31] * input1[_11] + input0[_32] * input1[_21] + + input0[_33] * input1[_31] + input0[_34] * input1[_41]; + work[_32] = input0[_31] * input1[_12] + input0[_32] * input1[_22] + + input0[_33] * input1[_32] + input0[_34] * input1[_42]; + work[_33] = input0[_31] * input1[_13] + input0[_32] * input1[_23] + + input0[_33] * input1[_33] + input0[_34] * input1[_43]; + work[_34] = input0[_31] * input1[_14] + input0[_32] * input1[_24] + + input0[_33] * input1[_34] + input0[_34] * input1[_44]; + work[_41] = input0[_41] * input1[_11] + input0[_42] * input1[_21] + + input0[_43] * input1[_31] + input0[_44] * input1[_41]; + work[_42] = input0[_41] * input1[_12] + input0[_42] * input1[_22] + + input0[_43] * input1[_32] + input0[_44] * input1[_42]; + work[_43] = input0[_41] * input1[_13] + input0[_42] * input1[_23] + + input0[_43] * input1[_33] + input0[_44] * input1[_43]; + work[_44] = input0[_41] * input1[_14] + input0[_42] * input1[_24] + + input0[_43] * input1[_34] + input0[_44] * input1[_44]; + + // Output the result. + matrix_copy(output, work); +} + +void matrix_rotate(MATRIX output, MATRIX input0, VECTOR input1) { + MATRIX work; + + // Apply the z-axis rotation. + matrix_unit(work); + work[_11] = cosf(input1[2]); + work[_12] = sinf(input1[2]); + work[_21] = -sinf(input1[2]); + work[_22] = cosf(input1[2]); + matrix_multiply(output, input0, work); + + // Apply the y-axis rotation. + matrix_unit(work); + work[_11] = cosf(input1[1]); + work[_13] = -sinf(input1[1]); + work[_31] = sinf(input1[1]); + work[_33] = cosf(input1[1]); + matrix_multiply(output, output, work); + + // Apply the x-axis rotation. + matrix_unit(work); + work[_22] = cosf(input1[0]); + work[_23] = sinf(input1[0]); + work[_32] = -sinf(input1[0]); + work[_33] = cosf(input1[0]); + matrix_multiply(output, output, work); +} + +void matrix_scale(MATRIX output, MATRIX input0, VECTOR input1) { + MATRIX work; + + // Apply the scaling. + matrix_unit(work); + work[_11] = input1[_X]; + work[_22] = input1[_Y]; + work[_33] = input1[_Z]; + matrix_multiply(output, input0, work); +} + +void matrix_translate(MATRIX output, MATRIX input0, VECTOR input1) { + MATRIX work; + + // Apply the translation. + matrix_unit(work); + work[_41] = input1[_X]; + work[_42] = input1[_Y]; + work[_43] = input1[_Z]; + matrix_multiply(output, input0, work); +} + +void matrix_transpose(MATRIX output, MATRIX input0) { + MATRIX work; + + // Transpose the matrix. + work[_11] = input0[_11]; + work[_12] = input0[_21]; + work[_13] = input0[_31]; + work[_14] = input0[_41]; + work[_21] = input0[_12]; + work[_22] = input0[_22]; + work[_23] = input0[_32]; + work[_24] = input0[_42]; + work[_31] = input0[_13]; + work[_32] = input0[_23]; + work[_33] = input0[_33]; + work[_34] = input0[_43]; + work[_41] = input0[_14]; + work[_42] = input0[_24]; + work[_43] = input0[_34]; + work[_44] = input0[_44]; + + // Output the result. + matrix_copy(output, work); +} + +void matrix_unit(MATRIX output) { + // Create a unit matrix. + memset(output, 0, sizeof(MATRIX)); + output[_11] = 1.00f; + output[_22] = 1.00f; + output[_33] = 1.00f; + output[_44] = 1.00f; +} + +// creation functions + +void create_local_world(MATRIX local_world, VECTOR translation, + VECTOR rotation) { + // Create the local_world matrix. + matrix_unit(local_world); + matrix_rotate(local_world, local_world, rotation); + matrix_translate(local_world, local_world, translation); +} + +void create_local_light(MATRIX local_light, VECTOR rotation) { + // Create the local_light matrix. + matrix_unit(local_light); + matrix_rotate(local_light, local_light, rotation); +} + +void create_world_view(MATRIX world_view, VECTOR translation, VECTOR rotation) { + VECTOR work0, work1; + + // Reverse the translation. + work0[_X] = -translation[_X]; + work0[_Y] = -translation[_Y]; + work0[_Z] = -translation[_Z]; + work0[_W] = translation[_W]; + + // Reverse the rotation. + work1[_X] = -rotation[_X]; + work1[_Y] = -rotation[_Y]; + work1[_Z] = -rotation[_Z]; + work1[_W] = rotation[_W]; + + // Create the world_view matrix. + matrix_unit(world_view); + matrix_translate(world_view, world_view, work0); + matrix_rotate(world_view, world_view, work1); +} + +void create_view_screen(MATRIX view_screen, float aspect, float left, + float right, float bottom, float top, float near, + float far) { + /* We want to create a matrix that transforms + field of view frustum (a truncated pyramid) + into a normalized cuboid (for fast hardware clipping): + w, 0, 0, 0, + 0, -h, 0, 0, + 0, 0, (f+n) / (f-n), -1, + 0, 0, (2*f*n) / (f-n), 0 + (w:kFramebufferWidth,h:kFramebufferHeight,n:z near,f:z far) + */ + + // Apply the aspect ratio adjustment. + left = (left * aspect); + right = (right * aspect); + + // Create the view_screen matrix. + /* matrix_unit(view_screen); + view_screen[_11] = (2 * near) / (right - left); + view_screen[_22] = (2 * near) / (top - bottom); + view_screen[_31] = (right + left) / (right - left); + view_screen[_32] = (top + bottom) / (top - bottom); + view_screen[_33] = (far + near) / (far - near); + view_screen[_34] = -1.00f; + view_screen[_43] = (2 * far * near) / (far - near); + view_screen[_44] = 0.00f; + + //This is good for ps2 clipping, where pixel is considered visible if: + //-w < x < w + //-w < y < w + //-w < z < w + //It's not automatic, it's done by using 'clipw' and testing flags in vu1 + code + //Result of the test allows to exclude entire triangle + */ + + // For xbox1 clipping, pixel is considered visible if: + //-w < x < w + //-w < y < w + // 0 < z < w + // It's automatic and verified for each pixel before pixel shader is called + + // so we need this : + matrix_unit(view_screen); + view_screen[_11] = (2 * near) / (right - left); + view_screen[_22] = (2 * near) / (top - bottom); + view_screen[_31] = -(right + left) / (right - left); + view_screen[_32] = -(top + bottom) / (top - bottom); + view_screen[_33] = -far / (far - near); + view_screen[_34] = -1.00f; + view_screen[_43] = near * far / (far - near); + view_screen[_44] = 0.00f; +} + +void create_local_screen(MATRIX local_screen, MATRIX local_world, + MATRIX world_view, MATRIX view_screen) { + // Create the local_screen matrix. + matrix_unit(local_screen); + matrix_multiply(local_screen, local_screen, local_world); + matrix_multiply(local_screen, local_screen, world_view); + matrix_multiply(local_screen, local_screen, view_screen); +} diff --git a/math3d.h b/math3d.h index 2b11815..194a3f1 100644 --- a/math3d.h +++ b/math3d.h @@ -1,37 +1,40 @@ -//port of ooPo's ps2sdk math3d library +// port of ooPo's ps2sdk math3d library #ifndef _MATH3D_H_ #define _MATH3D_H_ +#ifdef __cplusplus +extern "C" { +#endif + typedef float VECTOR[4]; typedef float MATRIX[16]; -//vector indices -#define _X 0 -#define _Y 1 -#define _Z 2 -#define _W 3 +// vector indices +#define _X 0 +#define _Y 1 +#define _Z 2 +#define _W 3 -//4x4 matrices indices -#define _11 0 -#define _12 1 -#define _13 2 -#define _14 3 -#define _21 4 -#define _22 5 -#define _23 6 -#define _24 7 -#define _31 8 -#define _32 9 -#define _33 10 -#define _34 11 -#define _41 12 -#define _42 13 -#define _43 14 -#define _44 15 +// 4x4 matrices indices +#define _11 0 +#define _12 1 +#define _13 2 +#define _14 3 +#define _21 4 +#define _22 5 +#define _23 6 +#define _24 7 +#define _31 8 +#define _32 9 +#define _33 10 +#define _34 11 +#define _41 12 +#define _42 13 +#define _43 14 +#define _44 15 - -//vector functions +// vector functions void vector_apply(VECTOR output, VECTOR input0, MATRIX input1); // Multiply a vector by a matrix, returning a vector. @@ -49,12 +52,13 @@ void vector_multiply(VECTOR output, VECTOR input0, VECTOR input1); // Multiply two vectors together. void vector_normalize(VECTOR output, VECTOR input0); -// Normalize a vector by determining its length and dividing its values by this value. +// Normalize a vector by determining its length and dividing its values by this +// value. void vector_outerproduct(VECTOR output, VECTOR input0, VECTOR input1); // Calculate the outer product of two vectors. -//matrices functions +// matrices functions void matrix_copy(MATRIX output, MATRIX input0); // Copy a matrix. @@ -80,9 +84,10 @@ void matrix_transpose(MATRIX output, MATRIX input0); void matrix_unit(MATRIX output); // Create a unit matrix. -//creation functions +// creation functions -void create_local_world(MATRIX local_world, VECTOR translation, VECTOR rotation); +void create_local_world(MATRIX local_world, VECTOR translation, + VECTOR rotation); // Create a local_world matrix given a translation and rotation. // Commonly used to describe an object's position and orientation. @@ -94,13 +99,20 @@ void create_world_view(MATRIX world_view, VECTOR translation, VECTOR rotation); // Create a world_view matrix given a translation and rotation. // Commonly used to describe a camera's position and rotation. -void create_view_screen(MATRIX view_screen, float aspect, float left, float right, float bottom, float top, float near, float far); +void create_view_screen(MATRIX view_screen, float aspect, float left, + float right, float bottom, float top, float near, + float far); // Create a view_screen matrix given an aspect and clipping plane values. // Functionally similar to the opengl function: glFrustum() -void create_local_screen(MATRIX local_screen, MATRIX local_world, MATRIX world_view, MATRIX view_screen); -// Create a local_screen matrix given a local_world, world_view and view_screen matrix. -// Commonly used with vector_apply() to transform vertices for rendering. +void create_local_screen(MATRIX local_screen, MATRIX local_world, + MATRIX world_view, MATRIX view_screen); +// Create a local_screen matrix given a local_world, world_view and view_screen +// matrix. Commonly used with vector_apply() to transform vertices for +// rendering. + +#ifdef __cplusplus +}; +#endif - #endif diff --git a/nxdk_missing_defines.h b/nxdk_missing_defines.h new file mode 100644 index 0000000..4321833 --- /dev/null +++ b/nxdk_missing_defines.h @@ -0,0 +1,17 @@ +#ifndef NXDK_ZBUFFER_TESTS_NXDK_MISSING_DEFINES_H +#define NXDK_ZBUFFER_TESTS_NXDK_MISSING_DEFINES_H + +// TODO: upstream missing nv2a defines +#define NV2A_VERTEX_ATTR_POSITION 0 +#define NV2A_VERTEX_ATTR_NORMAL 2 +#define NV2A_VERTEX_ATTR_TEXTURE0 9 +#define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8 0x17 +#define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8 0x3B +#define NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 0x24 +#define NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8 0x25 +#define NV097_SET_TEXTURE_FORMAT_COLOR_D16 0x2C // TODO: proper nvidia name +#define NV097_SET_TEXTURE_FORMAT_COLOR_LIN_F16 0x31 // TODO: proper nvidia name +#define NV097_SET_CONTROL0_COLOR_SPACE_CONVERT 0xF0000000 +#define NV097_SET_CONTROL0_COLOR_SPACE_CONVERT_CRYCB_TO_RGB 0x1 + +#endif // NXDK_ZBUFFER_TESTS_NXDK_MISSING_DEFINES_H diff --git a/texture.h b/resources/texture.h similarity index 100% rename from texture.h rename to resources/texture.h diff --git a/verts.h b/resources/verts.h similarity index 97% rename from verts.h rename to resources/verts.h index 1d843d7..1f4968c 100644 --- a/verts.h +++ b/resources/verts.h @@ -36,3 +36,5 @@ struct Vertex vertices[] = { { {-0.500000, 0.500000, -0.500000}, {0.000000, 0.000000}, {-1.000000, 0.000000, -0.000000} }, { {-0.500000, -0.500000, -0.500000}, {0.000000, 1.000000}, {-1.000000, 0.000000, -0.000000} } }; + +static constexpr uint32_t kNumVertices = sizeof(vertices) / sizeof(vertices[0]); diff --git a/swizzle.c b/swizzle.c deleted file mode 100644 index 68dbf2a..0000000 --- a/swizzle.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - * QEMU texture swizzling routines - * - * Copyright (c) 2015 Jannik Vogel - * Copyright (c) 2013 espes - * Copyright (c) 2007-2010 The Nouveau Project. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -#include -#include -#include -#include - -#include "swizzle.h" - -/* This should be pretty straightforward. - * It creates a bit pattern like ..zyxzyxzyx from ..xxx, ..yyy and ..zzz - * If there are no bits left from any component it will pack the other masks - * more tighly (Example: zzxzxzyx = Fewer x than z and even fewer y) - */ -static void generate_swizzle_masks(unsigned int width, - unsigned int height, - unsigned int depth, - uint32_t* mask_x, - uint32_t* mask_y, - uint32_t* mask_z) -{ - uint32_t x = 0, y = 0, z = 0; - uint32_t bit = 1; - uint32_t mask_bit = 1; - bool done; - do { - done = true; - if (bit < width) { x |= mask_bit; mask_bit <<= 1; done = false; } - if (bit < height) { y |= mask_bit; mask_bit <<= 1; done = false; } - if (bit < depth) { z |= mask_bit; mask_bit <<= 1; done = false; } - bit <<= 1; - } while(!done); - assert(x ^ y ^ z == (mask_bit - 1)); - *mask_x = x; - *mask_y = y; - *mask_z = z; -} - -/* This fills a pattern with a value if your value has bits abcd and your - * pattern is 11010100100 this will return: 0a0b0c00d00 - */ -static uint32_t fill_pattern(uint32_t pattern, uint32_t value) -{ - uint32_t result = 0; - uint32_t bit = 1; - while(value) { - if (pattern & bit) { - /* Copy bit to result */ - result |= value & 1 ? bit : 0; - value >>= 1; - } - bit <<= 1; - } - return result; -} - -static unsigned int get_swizzled_offset( - unsigned int x, unsigned int y, unsigned int z, - uint32_t mask_x, uint32_t mask_y, uint32_t mask_z, - unsigned int bytes_per_pixel) -{ - return bytes_per_pixel * (fill_pattern(mask_x, x) - | fill_pattern(mask_y, y) - | fill_pattern(mask_z, z)); -} - -void swizzle_box( - const uint8_t *src_buf, - unsigned int width, - unsigned int height, - unsigned int depth, - uint8_t *dst_buf, - unsigned int row_pitch, - unsigned int slice_pitch, - unsigned int bytes_per_pixel) -{ - uint32_t mask_x, mask_y, mask_z; - generate_swizzle_masks(width, height, depth, &mask_x, &mask_y, &mask_z); - - int x, y, z; - for (z = 0; z < depth; z++) { - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - const uint8_t *src = src_buf - + y * row_pitch + x * bytes_per_pixel; - uint8_t *dst = dst_buf + get_swizzled_offset(x, y, 0, - mask_x, mask_y, 0, - bytes_per_pixel); - memcpy(dst, src, bytes_per_pixel); - } - } - src_buf += slice_pitch; - } -} - -void unswizzle_box( - const uint8_t *src_buf, - unsigned int width, - unsigned int height, - unsigned int depth, - uint8_t *dst_buf, - unsigned int row_pitch, - unsigned int slice_pitch, - unsigned int bytes_per_pixel) -{ - uint32_t mask_x, mask_y, mask_z; - generate_swizzle_masks(width, height, depth, &mask_x, &mask_y, &mask_z); - - int x, y, z; - for (z = 0; z < depth; z++) { - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - const uint8_t *src = src_buf - + get_swizzled_offset(x, y, z, mask_x, mask_y, mask_z, - bytes_per_pixel); - uint8_t *dst = dst_buf + y * row_pitch + x * bytes_per_pixel; - memcpy(dst, src, bytes_per_pixel); - } - } - dst_buf += slice_pitch; - } -} - -void unswizzle_rect( - const uint8_t *src_buf, - unsigned int width, - unsigned int height, - uint8_t *dst_buf, - unsigned int pitch, - unsigned int bytes_per_pixel) -{ - unswizzle_box(src_buf, width, height, 1, dst_buf, pitch, 0, bytes_per_pixel); -} - -void swizzle_rect( - const uint8_t *src_buf, - unsigned int width, - unsigned int height, - uint8_t *dst_buf, - unsigned int pitch, - unsigned int bytes_per_pixel) -{ - swizzle_box(src_buf, width, height, 1, dst_buf, pitch, 0, bytes_per_pixel); -} diff --git a/swizzle.h b/swizzle.h deleted file mode 100644 index 21889b3..0000000 --- a/swizzle.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * QEMU texture swizzling routines - * - * Copyright (c) 2015 Jannik Vogel - * Copyright (c) 2013 espes - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -#ifndef HW_XBOX_SWIZZLE_H -#define HW_XBOX_SWIZZLE_H - -void swizzle_box( - const uint8_t *src_buf, - unsigned int width, - unsigned int height, - unsigned int depth, - uint8_t *dst_buf, - unsigned int row_pitch, - unsigned int slice_pitch, - unsigned int bytes_per_pixel); - -void unswizzle_box( - const uint8_t *src_buf, - unsigned int width, - unsigned int height, - unsigned int depth, - uint8_t *dst_buf, - unsigned int row_pitch, - unsigned int slice_pitch, - unsigned int bytes_per_pixel); - -void unswizzle_rect( - const uint8_t *src_buf, - unsigned int width, - unsigned int height, - uint8_t *dst_buf, - unsigned int pitch, - unsigned int bytes_per_pixel); - -void swizzle_rect( - const uint8_t *src_buf, - unsigned int width, - unsigned int height, - uint8_t *dst_buf, - unsigned int pitch, - unsigned int bytes_per_pixel); - -#endif diff --git a/third_party/swizzle.c b/third_party/swizzle.c new file mode 100644 index 0000000..94b1a9e --- /dev/null +++ b/third_party/swizzle.c @@ -0,0 +1,144 @@ +/* + * QEMU texture swizzling routines + * + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2013 espes + * Copyright (c) 2007-2010 The Nouveau Project. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "swizzle.h" + +#include +#include +#include +#include + +/* This should be pretty straightforward. + * It creates a bit pattern like ..zyxzyxzyx from ..xxx, ..yyy and ..zzz + * If there are no bits left from any component it will pack the other masks + * more tighly (Example: zzxzxzyx = Fewer x than z and even fewer y) + */ +static void generate_swizzle_masks(unsigned int width, unsigned int height, + unsigned int depth, uint32_t *mask_x, + uint32_t *mask_y, uint32_t *mask_z) { + uint32_t x = 0, y = 0, z = 0; + uint32_t bit = 1; + uint32_t mask_bit = 1; + bool done; + do { + done = true; + if (bit < width) { + x |= mask_bit; + mask_bit <<= 1; + done = false; + } + if (bit < height) { + y |= mask_bit; + mask_bit <<= 1; + done = false; + } + if (bit < depth) { + z |= mask_bit; + mask_bit <<= 1; + done = false; + } + bit <<= 1; + } while (!done); + assert(x ^ y ^ z == (mask_bit - 1)); + *mask_x = x; + *mask_y = y; + *mask_z = z; +} + +/* This fills a pattern with a value if your value has bits abcd and your + * pattern is 11010100100 this will return: 0a0b0c00d00 + */ +static uint32_t fill_pattern(uint32_t pattern, uint32_t value) { + uint32_t result = 0; + uint32_t bit = 1; + while (value) { + if (pattern & bit) { + /* Copy bit to result */ + result |= value & 1 ? bit : 0; + value >>= 1; + } + bit <<= 1; + } + return result; +} + +static unsigned int get_swizzled_offset(unsigned int x, unsigned int y, + unsigned int z, uint32_t mask_x, + uint32_t mask_y, uint32_t mask_z, + unsigned int bytes_per_pixel) { + return bytes_per_pixel * (fill_pattern(mask_x, x) | fill_pattern(mask_y, y) | + fill_pattern(mask_z, z)); +} + +void swizzle_box(const uint8_t *src_buf, unsigned int width, + unsigned int height, unsigned int depth, uint8_t *dst_buf, + unsigned int row_pitch, unsigned int slice_pitch, + unsigned int bytes_per_pixel) { + uint32_t mask_x, mask_y, mask_z; + generate_swizzle_masks(width, height, depth, &mask_x, &mask_y, &mask_z); + + int x, y, z; + for (z = 0; z < depth; z++) { + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + const uint8_t *src = src_buf + y * row_pitch + x * bytes_per_pixel; + uint8_t *dst = dst_buf + get_swizzled_offset(x, y, 0, mask_x, mask_y, 0, + bytes_per_pixel); + memcpy(dst, src, bytes_per_pixel); + } + } + src_buf += slice_pitch; + } +} + +void unswizzle_box(const uint8_t *src_buf, unsigned int width, + unsigned int height, unsigned int depth, uint8_t *dst_buf, + unsigned int row_pitch, unsigned int slice_pitch, + unsigned int bytes_per_pixel) { + uint32_t mask_x, mask_y, mask_z; + generate_swizzle_masks(width, height, depth, &mask_x, &mask_y, &mask_z); + + int x, y, z; + for (z = 0; z < depth; z++) { + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + const uint8_t *src = + src_buf + get_swizzled_offset(x, y, z, mask_x, mask_y, mask_z, + bytes_per_pixel); + uint8_t *dst = dst_buf + y * row_pitch + x * bytes_per_pixel; + memcpy(dst, src, bytes_per_pixel); + } + } + dst_buf += slice_pitch; + } +} + +void unswizzle_rect(const uint8_t *src_buf, unsigned int width, + unsigned int height, uint8_t *dst_buf, unsigned int pitch, + unsigned int bytes_per_pixel) { + unswizzle_box(src_buf, width, height, 1, dst_buf, pitch, 0, bytes_per_pixel); +} + +void swizzle_rect(const uint8_t *src_buf, unsigned int width, + unsigned int height, uint8_t *dst_buf, unsigned int pitch, + unsigned int bytes_per_pixel) { + swizzle_box(src_buf, width, height, 1, dst_buf, pitch, 0, bytes_per_pixel); +} diff --git a/third_party/swizzle.h b/third_party/swizzle.h new file mode 100644 index 0000000..848badc --- /dev/null +++ b/third_party/swizzle.h @@ -0,0 +1,52 @@ +/* + * QEMU texture swizzling routines + * + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2013 espes + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_SWIZZLE_H +#define HW_XBOX_SWIZZLE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +void swizzle_box(const uint8_t *src_buf, unsigned int width, + unsigned int height, unsigned int depth, uint8_t *dst_buf, + unsigned int row_pitch, unsigned int slice_pitch, + unsigned int bytes_per_pixel); + +void unswizzle_box(const uint8_t *src_buf, unsigned int width, + unsigned int height, unsigned int depth, uint8_t *dst_buf, + unsigned int row_pitch, unsigned int slice_pitch, + unsigned int bytes_per_pixel); + +void unswizzle_rect(const uint8_t *src_buf, unsigned int width, + unsigned int height, uint8_t *dst_buf, unsigned int pitch, + unsigned int bytes_per_pixel); + +void swizzle_rect(const uint8_t *src_buf, unsigned int width, + unsigned int height, uint8_t *dst_buf, unsigned int pitch, + unsigned int bytes_per_pixel); + +#ifdef __cplusplus +}; +#endif + +#endif