Refactoring.

This commit is contained in:
Erik Abair 2021-11-10 06:41:15 -08:00
parent 1c7f719e3a
commit 56e5a21367
15 changed files with 1405 additions and 1164 deletions

View File

@ -57,7 +57,7 @@ BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 80
ColumnLimit: 120
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true

2
.gitignore vendored
View File

@ -1,10 +1,12 @@
*.cpp.d
*.c.d
*.obj
*.iso
*.lib
*.exe
*.xbe
*.pdb
*.inl
.DS_Store
.vscode/

View File

@ -1,10 +1,15 @@
XBE_TITLE = nxdk_texture_format_tests
GEN_XISO = $(XBE_TITLE).iso
SRCS = $(CURDIR)/math3d.c $(CURDIR)/main.c $(CURDIR)/swizzle.c
SHADER_OBJS = ps.inl vs.inl
NXDK_DIR ?= $(CURDIR)/../nxdk
NXDK_SDL = y
NXDK_CXX = y
SRCS = \
$(CURDIR)/main.cpp \
$(CURDIR)/math3d.c \
$(CURDIR)/third_party/swizzle.c
SHADER_OBJS = ps.inl vs.inl
include $(NXDK_DIR)/Makefile

102
githooks/pre-commit Executable file
View File

@ -0,0 +1,102 @@
#!/bin/bash
#
# To enable this hook, rename this file to "pre-commit" and copy into the
# ../.git/hooks directory.
# Cross platform projects tend to avoid non-ASCII filenames; prevent
# them from being added to the repository. We exploit the fact that the
# printable range starts at the space character and ends with tilde.
function check_no_nonascii_characters {
if [ "${allownonascii}" == "true" ]; then
return
fi
# Note that the use of brackets around a tr range is ok here, (it's
# even required, for portability to Solaris 10's /usr/bin/tr), since
# the square bracket bytes happen to fall in the designated range.
if test $(git diff --cached --name-only --diff-filter=A -z "${against}" |
LC_ALL=C tr -d '[ -~]\0' | wc -c) != 0
then
cat <<\EOF
Error: Attempt to add a non-ASCII file name.
This can cause problems if you want to work with people on other platforms.
To be portable it is advisable to rename the file.
If you know what you are doing you can disable this check using:
git config hooks.allownonascii true
EOF
exit 1
fi
}
function check_no_diffmarkers_or_whitespace_errors {
# If there are whitespace errors, print the offending file names and fail.
set -e
git diff-index --check --cached "${against}" --
set +e
}
function run_clang_format {
echo "${changed_c_filenames}" | grep -v '3rdparty'
if [[ "${changed_c_filenames}" == "" ]]; then
return
fi
# Run clang-format against any changed C++ files.
if ! which clang-format > /dev/null; then
cat <<\EOF
Warning: clang-format is not installed or is not in the PATH.
Please install and amend this commit.
Debian:
sudo apt install clang-format
EOF
return
fi
# Reformat the files in-place and re-add any that were changed.
#
# Note that this has the side effect of incorporating changes to staged files
# that were not themselves staged. E.g., if you edit a file, `git add`, then
# edit some more, then commit, all of the changes will be committed, not just
# the staged ones. Depending on typical workflows it might be better to do
# something more complicated here, or to just have the hook fail instead of
# perform an in-place fix.
files_to_format="$(echo "${changed_c_filenames}" | grep -Ev 'third_party|resources')"
echo "${files_to_format}" | xargs clang-format -i
echo "${files_to_format}" | xargs git add
}
# If you want to allow non-ASCII filenames set this variable to true.
allownonascii=$(git config --bool hooks.allownonascii)
if git rev-parse --verify HEAD >/dev/null 2>&1; then
against=HEAD
else
# Initial commit: diff against an empty tree object
against=$(git hash-object -t tree /dev/null)
fi
# Redirect output to stderr.
exec 1>&2
added_and_modified_filenames="$(git diff --cached --name-only --diff-filter=d)"
changed_c_filenames="$(echo "${added_and_modified_filenames}" | \
grep -E '.*\.(c|cpp|h|hpp)$')"
# Allow blank line at EOF.
git config --local core.whitespace -blank-at-eof
check_no_nonascii_characters
check_no_diffmarkers_or_whitespace_errors
run_clang_format

552
main.c
View File

@ -1,552 +0,0 @@
/*
* This sample provides a very basic demonstration of 3D rendering on the Xbox,
* using pbkit. Based on the pbkit demo sources.
*/
#include <hal/video.h>
#include <hal/xbox.h>
#define _USE_MATH_DEFINES
#include <math.h>
#include <pbkit/pbkit.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <windows.h>
#include <xboxkrnl/xboxkrnl.h>
#include <hal/debug.h>
#include "math3d.h"
#include <SDL.h>
#include <SDL_image.h>
#include "swizzle.h"
typedef struct TextureFormatInfo {
SDL_PixelFormatEnum SdlFormat;
uint32_t XboxFormat;
uint16_t XboxBpp; // bytes per pixel
bool XboxSwizzled;
bool RequireConversion;
char* Name;
} TextureFormatInfo;
#pragma pack(1)
typedef struct Vertex {
float pos[3];
float texcoord[2];
float normal[3];
} Vertex;
#pragma pack()
static Vertex *alloc_vertices; // texcoords 0 to width/height
static Vertex *alloc_vertices_swizzled; // texcoords normalized 0 to 1
static uint32_t num_vertices;
MATRIX m_model, m_view, m_proj;
VECTOR v_cam_pos = { 0, 0.05, 1.07, 1 };
VECTOR v_cam_rot = { 0, 0, 0, 1 };
VECTOR v_light_dir = { 0, 0, 1, 1 };
#include "verts.h"
#include "texture.h"
#define MASK(mask, val) (((val) << (ffs(mask)-1)) & (mask))
#define MAXRAM 0x03FFAFFF
// TODO: upstream missing nv2a defines
#define NV2A_VERTEX_ATTR_POSITION 0
#define NV2A_VERTEX_ATTR_NORMAL 2
#define NV2A_VERTEX_ATTR_TEXTURE0 9
#define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8 0x17
#define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8 0x3B
#define NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 0x24
#define NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8 0x25
#define NV097_SET_TEXTURE_FORMAT_COLOR_D16 0x2C // TODO: proper nvidia name
#define NV097_SET_TEXTURE_FORMAT_COLOR_LIN_F16 0x31 // TODO: proper nvidia name
#define NV097_SET_CONTROL0_COLOR_SPACE_CONVERT 0xF0000000
#define NV097_SET_CONTROL0_COLOR_SPACE_CONVERT_CRYCB_TO_RGB 0x1
static void matrix_viewport(float out[4][4], float x, float y, float width, float height, float z_min, float z_max);
static void init_shader(void);
static void init_textures(void);
static void set_attrib_pointer(unsigned int index, unsigned int format, unsigned int size, unsigned int stride, const void* data);
static void draw_arrays(unsigned int mode, int start, int count);
static int update_texture_memory(void* texMem, TextureFormatInfo format, int width, int height);
static const TextureFormatInfo format_map[] = {
// swizzled
{ SDL_PIXELFORMAT_ABGR8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8, 4, true, false, "A8B8G8R8" },
{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8, 4, true, false, "R8G8B8A8" },
{ SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8, 4, true, false, "A8R8G8B8" },
{ SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8, 4, true, false, "X8R8G8B8" },
{ SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8, 4, true, false, "B8G8R8A8" },
{ SDL_PIXELFORMAT_RGB565, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5, 2, true, false, "R5G6B5" },
{ SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5, 2, true, false, "A1R5G5B5" },
{ SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5, 2, true, false, "X1R5G5B5" },
{ SDL_PIXELFORMAT_ARGB4444, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4, 2, true, false, "A4R4G4B4" },
// linear unsigned
{ SDL_PIXELFORMAT_ABGR8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8, 4, false, false, "A8B8G8R8" },
{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8, 4, false, false, "R8G8B8A8" },
{ SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8, 4, false, false, "A8R8G8B8" },
{ SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8, 4, false, false, "X8R8G8B8" },
{ SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8, 4, false, false, "B8G8R8A8" },
{ SDL_PIXELFORMAT_RGB565, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5, 2, false, false, "R5G6B5" },
{ SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5, 2, false, false, "A1R5G5B5" },
{ SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5, 2, false, false, "X1R5G5B5" },
{ SDL_PIXELFORMAT_ARGB4444, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4, 2, false, false, "A4R4G4B4" },
// yuv color space
// Each 4 bytes represent the color for 2 neighboring pixels:
// [ U0 | Y0 | V0 | Y1 ]
// Y0 is the brightness of pixel 0, Y1 the brightness of pixel 1.
// U0 and V0 is the color of both pixels. (second pixel is the one sampled? or averaged? doesn't really matter here)
// https://docs.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering#converting-8-bit-yuv-to-rgb888
{ SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8, 2, false, true, "YUY2" },
{ SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8, 2, false, true, "UYVY" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16, false, true, "Y16" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8, true, true, "SZ_Y8" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8, false, true, "Y8" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8, true, true, "SZ_AY8" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8, false, true, "AY8" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8, true, true, "SZ_A8" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8, true, true, "SZ_A8Y8" },
// misc formats
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5, false, true, "DXT1" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8, false, true, "DXT3" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8, false, true, "DXT5" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8, true, true, "SZ_G8B8" },
{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8, 2, false, true, "G8B8" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_D16, false, true, "D16" }, // TODO: implement in xemu
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LIN_F16, false, true, "LIN_F16" }, // TODO: implement in xemu
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8, true, true, "SZ_R8B8" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5, true, true, "R6G5B5" }
// TODO: define others here
};
// bitscan forward
int bsf(int val) {
__asm bsf eax, val
}
/* Main program function */
int main(void)
{
uint32_t *p;
int i, status;
int width = 640, height = 480;
float m_viewport[4][4];
int format_map_index = 0;
bool toggleFormat;
int texWidth = 256, texHeight = 256;
SDL_GameController *gameController;
XVideoSetMode(width, height, 32, REFRESH_DEFAULT);
// initialize input for the first gamepad
SDL_InitSubSystem(SDL_INIT_GAMECONTROLLER);
gameController = SDL_GameControllerOpen(0);
if (!gameController) {
debugPrint("Failed to initialize input for gamepad 0");
Sleep(2000);
return 1;
}
if ((status = pb_init())) {
debugPrint("pb_init Error %d\n", status);
Sleep(2000);
return 1;
}
pb_show_front_screen();
/* Load constant rendering things (shaders, geometry) */
init_shader();
// real nv2a hardware seems to cache this and not honor updates? have separate vertex buffers for swizzled and linear for now...
alloc_vertices = MmAllocateContiguousMemoryEx(sizeof(vertices), 0, MAXRAM, 0, PAGE_WRITECOMBINE | PAGE_READWRITE);
alloc_vertices_swizzled = MmAllocateContiguousMemoryEx(sizeof(vertices), 0, MAXRAM, 0, PAGE_WRITECOMBINE | PAGE_READWRITE);
memcpy(alloc_vertices, vertices, sizeof(vertices));
memcpy(alloc_vertices_swizzled, vertices, sizeof(vertices));
num_vertices = sizeof(vertices)/sizeof(vertices[0]);
for (int i = 0; i < num_vertices; i++) {
if (alloc_vertices[i].texcoord[0]) alloc_vertices[i].texcoord[0] = texWidth * 1.0f;
if (alloc_vertices[i].texcoord[1]) alloc_vertices[i].texcoord[1] = texHeight * 1.0f;
}
// allocate texture memory buffer large enough for all types
void *texMem = MmAllocateContiguousMemoryEx(texWidth * texHeight * 4, 0, MAXRAM, 0, PAGE_WRITECOMBINE | PAGE_READWRITE);
int texError = update_texture_memory(texMem, format_map[format_map_index], texWidth, texHeight);
/* Create view matrix (our camera is static) */
matrix_unit(m_view);
create_world_view(m_view, v_cam_pos, v_cam_rot);
/* Create projection matrix */
matrix_unit(m_proj);
create_view_screen(m_proj, (float)width/(float)height, -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 10000.0f);
/* Create viewport matrix, combine with projection */
matrix_viewport(m_viewport, 0, 0, width, height, 0, 65536.0f);
matrix_multiply(m_proj, m_proj, (float*)m_viewport);
/* Create local->world matrix given our updated object */
matrix_unit(m_model);
while(1) {
// cycle current texture based on A or B button presses
SDL_GameControllerUpdate();
bool aPress = SDL_GameControllerGetButton(gameController, SDL_CONTROLLER_BUTTON_A);
bool bPress = SDL_GameControllerGetButton(gameController, SDL_CONTROLLER_BUTTON_B);
if (aPress || bPress) {
if (toggleFormat) {
// TODO: back doesn't wrap as intended, re-do logic
format_map_index = (format_map_index + (aPress ? 1 : -1)) % (sizeof(format_map) / sizeof(format_map[0]));
texError = update_texture_memory(texMem, format_map[format_map_index], texWidth, texHeight);
}
toggleFormat = false;
} else toggleFormat = true;
pb_wait_for_vbl();
pb_reset();
pb_target_back_buffer();
/* Clear depth & stencil buffers */
pb_erase_depth_stencil_buffer(0, 0, width, height);
pb_fill(0, 0, width, height, 0xff000000);
pb_erase_text_screen();
while(pb_busy()) {
/* Wait for completion... */
}
/*
* Setup texture stages
*/
/* Enable texture stage 0 */
/* FIXME: Use constants instead of the hardcoded values below */
p = pb_begin();
// first one seems to be needed
p = pb_push1(p, NV097_SET_FRONT_FACE, NV097_SET_FRONT_FACE_V_CCW);
p = pb_push1(p, NV097_SET_DEPTH_TEST_ENABLE, true);
// Enable alpha blending functionality
p = pb_push1(p, NV097_SET_BLEND_ENABLE, true);
// Set the alpha blend source (s) and destination (d) factors
p = pb_push1(p, NV097_SET_BLEND_FUNC_SFACTOR, NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA);
p = pb_push1(p, NV097_SET_BLEND_FUNC_DFACTOR, NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_ALPHA);
// yuv requires color space conversion
if (format_map[format_map_index].XboxFormat == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 ||
format_map[format_map_index].XboxFormat == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8) {
p = pb_push1(p, NV097_SET_CONTROL0,
MASK(NV097_SET_CONTROL0_COLOR_SPACE_CONVERT, NV097_SET_CONTROL0_COLOR_SPACE_CONVERT_CRYCB_TO_RGB));
}
DWORD format_mask = MASK(NV097_SET_TEXTURE_FORMAT_CONTEXT_DMA, 1) |
MASK(NV097_SET_TEXTURE_FORMAT_CUBEMAP_ENABLE, 0) |
MASK(NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE, NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE_COLOR) |
MASK(NV097_SET_TEXTURE_FORMAT_DIMENSIONALITY, 2) |
MASK(NV097_SET_TEXTURE_FORMAT_COLOR, format_map[format_map_index].XboxFormat) |
MASK(NV097_SET_TEXTURE_FORMAT_MIPMAP_LEVELS, 1) |
MASK(NV097_SET_TEXTURE_FORMAT_BASE_SIZE_U, format_map[format_map_index].XboxSwizzled ? bsf(texWidth) : 0) |
MASK(NV097_SET_TEXTURE_FORMAT_BASE_SIZE_V, format_map[format_map_index].XboxSwizzled ? bsf(texHeight) : 0) |
MASK(NV097_SET_TEXTURE_FORMAT_BASE_SIZE_P, 0);
p = pb_push2(p,NV20_TCL_PRIMITIVE_3D_TX_OFFSET(0),(DWORD)texMem & 0x03ffffff,format_mask); //set stage 0 texture address & format
if (!format_map[format_map_index].XboxSwizzled) {
p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_NPOT_PITCH(0),(format_map[format_map_index].XboxBpp * texWidth)<<16); //set stage 0 texture pitch (pitch<<16)
p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_NPOT_SIZE(0),(texWidth<<16)|texHeight); //set stage 0 texture width & height ((witdh<<16)|height)
}
p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_WRAP(0),0x00030303);//set stage 0 texture modes (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp 4=border 5=clamp to edge)
p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_ENABLE(0),0x4003ffc0); //set stage 0 texture enable flags
p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_FILTER(0),0x04074000); //set stage 0 texture filters (AA!)
pb_end(p);
/* Disable other texture stages */
p = pb_begin();
p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_ENABLE(1),0x0003ffc0);//set stage 1 texture enable flags (bit30 disabled)
p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_ENABLE(2),0x0003ffc0);//set stage 2 texture enable flags (bit30 disabled)
p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_ENABLE(3),0x0003ffc0);//set stage 3 texture enable flags (bit30 disabled)
p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_WRAP(1),0x00030303);//set stage 1 texture modes (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp 4=border 5=clamp to edge)
p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_WRAP(2),0x00030303);//set stage 2 texture modes (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp 4=border 5=clamp to edge)
p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_WRAP(3),0x00030303);//set stage 3 texture modes (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp 4=border 5=clamp to edge)
p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_FILTER(1),0x02022000);//set stage 1 texture filters (no AA, stage not even used)
p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_FILTER(2),0x02022000);//set stage 2 texture filters (no AA, stage not even used)
p = pb_push1(p,NV20_TCL_PRIMITIVE_3D_TX_FILTER(3),0x02022000);//set stage 3 texture filters (no AA, stage not even used)
pb_end(p);
/* Send shader constants
*
* WARNING: Changing shader source code may impact constant locations!
* Check the intermediate file (*.inl) for the expected locations after
* changing the code.
*/
p = pb_begin();
/* Set shader constants cursor at C0 */
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_ID, 96);
/* Send the model matrix */
pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 16);
memcpy(p, m_model, 16*4); p+=16;
/* Send the view matrix */
pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 16);
memcpy(p, m_view, 16*4); p+=16;
/* Send the projection matrix */
pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 16);
memcpy(p, m_proj, 16*4); p+=16;
/* Send camera position */
pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 4);
memcpy(p, v_cam_pos, 4*4); p+=4;
/* Send light direction */
pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 4);
memcpy(p, v_light_dir, 4*4); p+=4;
/* Send shader constants */
float constants_0[4] = {0, 0, 0, 0};
pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 4);
memcpy(p, constants_0, 4*4); p+=4;
/* Clear all attributes */
pb_push(p++,NV097_SET_VERTEX_DATA_ARRAY_FORMAT,16);
for(i = 0; i < 16; i++) {
*(p++) = 2;
}
pb_end(p);
/*
* Setup vertex attributes
*/
Vertex *vptr = format_map[format_map_index].XboxSwizzled ? alloc_vertices_swizzled : alloc_vertices;
/* Set vertex position attribute */
set_attrib_pointer(NV2A_VERTEX_ATTR_POSITION, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F,
3, sizeof(Vertex), &vptr[0].pos);
/* Set texture coordinate attribute */
set_attrib_pointer(NV2A_VERTEX_ATTR_TEXTURE0, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F,
2, sizeof(Vertex), &vptr[0].texcoord);
/* Set vertex normal attribute */
set_attrib_pointer(NV2A_VERTEX_ATTR_NORMAL, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F,
3, sizeof(Vertex), &vptr[0].normal);
/* Begin drawing triangles */
draw_arrays(NV097_SET_BEGIN_END_OP_TRIANGLES, 0, num_vertices);
/* Draw some text on the screen */
pb_print("N: %s\n", format_map[format_map_index].Name);
pb_print("F: 0x%x\n", format_map[format_map_index].XboxFormat);
pb_print("SZ: %d\n", format_map[format_map_index].XboxSwizzled);
pb_print("C: %d\n", format_map[format_map_index].RequireConversion);
pb_print("W: %d\n", texWidth);
pb_print("H: %d\n", texHeight);
pb_print("P: %d\n", format_map[format_map_index].XboxBpp * texWidth);
pb_print("ERR: %d\n", texError);
pb_draw_text_screen();
while(pb_busy()) {
/* Wait for completion... */
}
/* Swap buffers (if we can) */
while (pb_finished()) {
/* Not ready to swap yet */
}
}
/* Unreachable cleanup code */
SDL_GameControllerClose(gameController);
SDL_QuitSubSystem(SDL_INIT_GAMECONTROLLER);
MmFreeContiguousMemory(alloc_vertices);
MmFreeContiguousMemory(alloc_vertices_swizzled);
MmFreeContiguousMemory(texMem);
pb_show_debug_screen();
pb_kill();
return 0;
}
static int update_texture_memory(void *texMem, TextureFormatInfo format, int width, int height)
{
// create source surface
SDL_Surface *gradient_surf = SDL_CreateRGBSurfaceWithFormat(0, width, height, 32, SDL_PIXELFORMAT_RGBA8888);
if (gradient_surf == NULL)
return 1;
if (SDL_LockSurface(gradient_surf))
return 2;
// TODO: have different color patterns controlled by alternate gamepad button(s)
// generate basic gradient pattern
uint32_t *pixels = gradient_surf->pixels;
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x++) {
int xNorm = x * 255.0f / width;
int yNorm = y * 255.0f / height;
pixels[y * width + x] = SDL_MapRGBA(gradient_surf->format, yNorm, xNorm, 255 - yNorm, xNorm + yNorm);
}
SDL_UnlockSurface(gradient_surf);
// if conversion required, do so, otherwise use SDL to convert
if (format.RequireConversion) {
uint8_t *dstP = (uint8_t*)texMem;
// TODO: potential reference material - https://github.com/scalablecory/colors/blob/master/color.c
switch (format.XboxFormat) {
case NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8: // YUY2 aka YUYV
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x += 2) {
uint8_t R0, G0, B0, R1, G1, B1;
SDL_GetRGB(pixels[y * width + x], gradient_surf->format, &R0, &G0, &B0);
SDL_GetRGB(pixels[y * width + x + 1], gradient_surf->format, &R1, &G1, &B1);
dstP[0] = (0.257f * R0) + (0.504f * G0) + (0.098f * B0) + 16; // Y0
dstP[1] = -(0.148f * R1) - (0.291f * G1) + (0.439f * B1) + 128; // U
dstP[2] = (0.257f * R1) + (0.504f * G1) + (0.098f * B1) + 16; // Y1
dstP[3] = (0.439f * R1) - (0.368f * G1) - (0.071f * B1) + 128; // V
dstP += 4;
}
break;
case NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8: // UYVY
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x += 2) {
uint8_t R0, G0, B0, R1, G1, B1;
SDL_GetRGB(pixels[y * width + x], gradient_surf->format, &R0, &G0, &B0);
SDL_GetRGB(pixels[y * width + x + 1], gradient_surf->format, &R1, &G1, &B1);
dstP[0] = -(0.148f * R1) - (0.291f * G1) + (0.439f * B1) + 128; // U
dstP[1] = (0.257f * R0) + (0.504f * G0) + (0.098f * B0) + 16; // Y0
dstP[2] = (0.439f * R1) - (0.368f * G1) - (0.071f * B1) + 128; // V
dstP[3] = (0.257f * R1) + (0.504f * G1) + (0.098f * B1) + 16; // Y1
dstP += 4;
}
break;
case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8:
// TODO: for now, just let default gradient happen
break;
default:
SDL_FreeSurface(gradient_surf);
return 3;
break;
}
// TODO: swizzling
SDL_FreeSurface(gradient_surf);
} else {
// standard SDL conversion to destination format
SDL_Surface *new_surf = SDL_ConvertSurfaceFormat(gradient_surf, format.SdlFormat, 0);
SDL_FreeSurface(gradient_surf);
if (!new_surf)
return 4;
// copy pixels over to texture memory, swizzling if desired
if (format.XboxSwizzled) {
swizzle_rect((uint8_t*)new_surf->pixels, new_surf->w, new_surf->h, texMem, new_surf->pitch, new_surf->format->BytesPerPixel);
} else {
memcpy(texMem, new_surf->pixels, new_surf->pitch * new_surf->h);
}
SDL_FreeSurface(new_surf);
}
return 0;
}
/* Construct a viewport transformation matrix */
static void matrix_viewport(float out[4][4], float x, float y, float width, float height, float z_min, float z_max)
{
memset(out, 0, 4*4*sizeof(float));
out[0][0] = width/2.0f;
out[1][1] = height/-2.0f;
out[2][2] = (z_max - z_min)/2.0f;
out[3][3] = 1.0f;
out[3][0] = x + width/2.0f;
out[3][1] = y + height/2.0f;
out[3][2] = (z_min + z_max)/2.0f;
}
/* Load the shader we will render with */
static void init_shader(void)
{
uint32_t *p;
int i;
/* Setup vertex shader */
uint32_t vs_program[] = {
#include "vs.inl"
};
p = pb_begin();
/* Set run address of shader */
p = pb_push1(p, NV097_SET_TRANSFORM_PROGRAM_START, 0);
/* Set execution mode */
p = pb_push1(p, NV097_SET_TRANSFORM_EXECUTION_MODE,
MASK(NV097_SET_TRANSFORM_EXECUTION_MODE_MODE, NV097_SET_TRANSFORM_EXECUTION_MODE_MODE_PROGRAM)
| MASK(NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE, NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE_PRIV));
p = pb_push1(p, NV097_SET_TRANSFORM_PROGRAM_CXT_WRITE_EN, 0);
pb_end(p);
/* Set cursor and begin copying program */
p = pb_begin();
p = pb_push1(p, NV097_SET_TRANSFORM_PROGRAM_LOAD, 0);
pb_end(p);
/* Copy program instructions (16-bytes each) */
for (i=0; i<sizeof(vs_program)/16; i++) {
p = pb_begin();
pb_push(p++, NV097_SET_TRANSFORM_PROGRAM, 4);
memcpy(p, &vs_program[i*4], 4*4);
p+=4;
pb_end(p);
}
/* Setup fragment shader */
p = pb_begin();
#include "ps.inl"
pb_end(p);
}
/* Set an attribute pointer */
static void set_attrib_pointer(unsigned int index, unsigned int format, unsigned int size, unsigned int stride, const void* data)
{
uint32_t *p = pb_begin();
p = pb_push1(p, NV097_SET_VERTEX_DATA_ARRAY_FORMAT + index*4,
MASK(NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE, format) | \
MASK(NV097_SET_VERTEX_DATA_ARRAY_FORMAT_SIZE, size) | \
MASK(NV097_SET_VERTEX_DATA_ARRAY_FORMAT_STRIDE, stride));
p = pb_push1(p, NV097_SET_VERTEX_DATA_ARRAY_OFFSET + index*4, (uint32_t)data & 0x03ffffff);
pb_end(p);
}
/* Send draw commands for the triangles */
static void draw_arrays(unsigned int mode, int start, int count)
{
uint32_t *p = pb_begin();
p = pb_push1(p, NV097_SET_BEGIN_END, mode);
p = pb_push1(p, 0x40000000|NV097_DRAW_ARRAYS, //bit 30 means all params go to same register 0x1810
MASK(NV097_DRAW_ARRAYS_COUNT, (count-1)) | MASK(NV097_DRAW_ARRAYS_START_INDEX, start));
p = pb_push1(p, NV097_SET_BEGIN_END, NV097_SET_BEGIN_END_OP_END);
pb_end(p);
}

657
main.cpp Normal file
View File

@ -0,0 +1,657 @@
/*
* This sample provides a very basic demonstration of 3D rendering on the Xbox,
* using pbkit. Based on the pbkit demo sources.
*/
#include <hal/video.h>
#include <hal/xbox.h>
#pragma clang diagnostic push
#pragma ide diagnostic ignored "OCUnusedMacroInspection"
// clang format off
#define _USE_MATH_DEFINES
#include <cmath>
// clang format on
#include <SDL.h>
#include <SDL_image.h>
#include <hal/debug.h>
#include <pbkit/pbkit.h>
#include <strings.h>
#include <windows.h>
#include <xboxkrnl/xboxkrnl.h>
#include <cstdint>
#include <cstring>
#include "math3d.h"
#include "nxdk_missing_defines.h"
#include "third_party/swizzle.h"
typedef struct TextureFormatInfo {
SDL_PixelFormatEnum SdlFormat;
uint32_t XboxFormat;
uint16_t XboxBpp; // bytes per pixel
bool XboxSwizzled;
bool RequireConversion;
const char *Name;
} TextureFormatInfo;
#pragma pack(1)
typedef struct Vertex {
float pos[3];
float texcoord[2];
float normal[3];
} Vertex;
#pragma pack()
static Vertex *alloc_vertices; // texcoords 0 to kFramebufferWidth/kFramebufferHeight
static Vertex *alloc_vertices_swizzled; // texcoords normalized 0 to 1
static constexpr int kFramebufferWidth = 640;
static constexpr int kFramebufferHeight = 480;
static constexpr int kTextureWidth = 256;
static constexpr int kTextureHeight = 256;
MATRIX m_model, m_view, m_proj;
VECTOR v_cam_pos = {0, 0.05, 1.07, 1};
VECTOR v_cam_rot = {0, 0, 0, 1};
VECTOR v_light_dir = {0, 0, 1, 1};
#include "resources/texture.h"
#include "resources/verts.h"
#define MASK(mask, val) (((val) << (ffs(mask) - 1)) & (mask))
#define MAXRAM 0x03FFAFFF
static void init_vertices();
static void init_matrices();
static void matrix_viewport(float out[4][4], float x, float y, float width, float height, float z_min, float z_max);
static void init_shader();
static void set_attrib_pointer(unsigned int index, unsigned int format, unsigned int size, unsigned int stride,
const void *data);
static void draw_arrays(unsigned int mode, int start, int count);
static int update_texture_memory(uint8_t *texture_memory, TextureFormatInfo format, int width, int height);
static void save_framebuffer(uint8_t *framebuffer, int format_map_index);
static constexpr TextureFormatInfo format_map[] = {
// swizzled
{SDL_PIXELFORMAT_ABGR8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8, 4, true, false, "A8B8G8R8"},
{SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8, 4, true, false, "R8G8B8A8"},
{SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8, 4, true, false, "A8R8G8B8"},
{SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8, 4, true, false, "X8R8G8B8"},
{SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8, 4, true, false, "B8G8R8A8"},
{SDL_PIXELFORMAT_RGB565, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5, 2, true, false, "R5G6B5"},
{SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5, 2, true, false, "A1R5G5B5"},
{SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5, 2, true, false, "X1R5G5B5"},
{SDL_PIXELFORMAT_ARGB4444, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4, 2, true, false, "A4R4G4B4"},
// linear unsigned
{SDL_PIXELFORMAT_ABGR8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8, 4, false, false, "A8B8G8R8"},
{SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8, 4, false, false, "R8G8B8A8"},
{SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8, 4, false, false, "A8R8G8B8"},
{SDL_PIXELFORMAT_ARGB8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8, 4, false, false, "X8R8G8B8"},
{SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8, 4, false, false, "B8G8R8A8"},
{SDL_PIXELFORMAT_RGB565, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5, 2, false, false, "R5G6B5"},
{SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5, 2, false, false, "A1R5G5B5"},
{SDL_PIXELFORMAT_ARGB1555, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5, 2, false, false, "X1R5G5B5"},
{SDL_PIXELFORMAT_ARGB4444, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4, 2, false, false, "A4R4G4B4"},
// yuv color space
// Each 4 bytes represent the color for 2 neighboring pixels:
// [ U0 | Y0 | V0 | Y1 ]
// Y0 is the brightness of pixel 0, Y1 the brightness of pixel 1.
// U0 and V0 is the color of both pixels. (second pixel is the one sampled?
// or averaged? doesn't really matter here)
// https://docs.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering#converting-8-bit-yuv-to-rgb888
{SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8, 2, false, true, "YUY2"},
{SDL_PIXELFORMAT_BGRA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8, 2, false, true, "UYVY"},
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16, false, true, "Y16" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8, true, true, "SZ_Y8" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8, false, true, "Y8" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8, true, true, "SZ_AY8" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8, false, true, "AY8" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8, true, true, "SZ_A8" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8, true, true, "SZ_A8Y8" },
// misc formats
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5, false, true, "DXT1" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8, false, true, "DXT3" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8, false, true, "DXT5" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8, true, true, "SZ_G8B8" },
{SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8, 2, false, true, "G8B8"},
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_D16, false, true, "D16" }, // TODO: implement in
//xemu
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_LIN_F16, false, true, "LIN_F16" }, // TODO:
//implement in xemu
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8, true, true, "SZ_R8B8" },
//{ SDL_PIXELFORMAT_RGBA8888, NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5, true, true, "R6G5B5" }
// TODO: define others here
};
static constexpr int kNumFormats = sizeof(format_map) / sizeof(format_map[0]);
// bitscan forward
int bsf(int val) { __asm bsf eax, val }
/* Main program function */
int main() {
uint32_t *p;
XVideoSetMode(kFramebufferWidth, kFramebufferHeight, 32, REFRESH_DEFAULT);
// initialize input for the first gamepad
SDL_InitSubSystem(SDL_INIT_GAMECONTROLLER);
SDL_GameController *gameController = SDL_GameControllerOpen(0);
if (!gameController) {
debugPrint("Failed to initialize input for gamepad 0.");
Sleep(2000);
return 1;
}
if (!(IMG_Init(IMG_INIT_PNG) & IMG_INIT_PNG)) {
debugPrint("Failed to initialize SDL_image PNG mode.");
Sleep(2000);
return 1;
}
int status = pb_init();
if (status) {
debugPrint("pb_init Error %d\n", status);
Sleep(2000);
return 1;
}
pb_show_front_screen();
/* Load constant rendering things (shaders, geometry) */
init_shader();
init_vertices();
// allocate texture memory buffer large enough for all types
auto texture_memory = static_cast<uint8_t *>(MmAllocateContiguousMemoryEx(
kTextureWidth * kTextureHeight * 4, 0, MAXRAM, 0, PAGE_WRITECOMBINE | PAGE_READWRITE));
int format_map_index = 0;
int update_texture_result =
update_texture_memory(texture_memory, format_map[format_map_index], kTextureWidth, kTextureHeight);
init_matrices();
#pragma clang diagnostic push
#pragma ide diagnostic ignored "EndlessLoop"
uint8_t *framebuffer = XVideoGetFB();
bool toggle_format_allowed = true;
bool render_changed = true;
while (true) {
// cycle current texture based on A or B button presses
SDL_GameControllerUpdate();
bool a_pressed = SDL_GameControllerGetButton(gameController, SDL_CONTROLLER_BUTTON_A);
bool b_pressed = SDL_GameControllerGetButton(gameController, SDL_CONTROLLER_BUTTON_B);
if (a_pressed || b_pressed) {
if (toggle_format_allowed) {
if (a_pressed) {
format_map_index = (format_map_index + 1) % kNumFormats;
} else {
if (--format_map_index < 0) {
format_map_index = kNumFormats - 1;
}
}
update_texture_result =
update_texture_memory(texture_memory, format_map[format_map_index], kTextureWidth, kTextureHeight);
render_changed = true;
}
toggle_format_allowed = false;
} else {
toggle_format_allowed = true;
}
pb_wait_for_vbl();
pb_reset();
pb_target_back_buffer();
/* Clear depth & stencil buffers */
pb_erase_depth_stencil_buffer(0, 0, kFramebufferWidth, kFramebufferHeight);
pb_fill(0, 0, kFramebufferWidth, kFramebufferHeight, 0xff000000);
pb_erase_text_screen();
while (pb_busy()) {
/* Wait for completion... */
}
/*
* Setup texture stages
*/
/* Enable texture stage 0 */
/* FIXME: Use constants instead of the hardcoded values below */
p = pb_begin();
// first one seems to be needed
p = pb_push1(p, NV097_SET_FRONT_FACE, NV097_SET_FRONT_FACE_V_CCW);
p = pb_push1(p, NV097_SET_DEPTH_TEST_ENABLE, true);
// Enable alpha blending functionality
p = pb_push1(p, NV097_SET_BLEND_ENABLE, true);
// Set the alpha blend source (s) and destination (d) factors
p = pb_push1(p, NV097_SET_BLEND_FUNC_SFACTOR, NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA);
p = pb_push1(p, NV097_SET_BLEND_FUNC_DFACTOR, NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_ALPHA);
// yuv requires color space conversion
if (format_map[format_map_index].XboxFormat == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 ||
format_map[format_map_index].XboxFormat == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8) {
p = pb_push1(p, NV097_SET_CONTROL0,
MASK(NV097_SET_CONTROL0_COLOR_SPACE_CONVERT, NV097_SET_CONTROL0_COLOR_SPACE_CONVERT_CRYCB_TO_RGB));
}
DWORD format_mask =
MASK(NV097_SET_TEXTURE_FORMAT_CONTEXT_DMA, 1) | MASK(NV097_SET_TEXTURE_FORMAT_CUBEMAP_ENABLE, 0) |
MASK(NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE, NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE_COLOR) |
MASK(NV097_SET_TEXTURE_FORMAT_DIMENSIONALITY, 2) |
MASK(NV097_SET_TEXTURE_FORMAT_COLOR, format_map[format_map_index].XboxFormat) |
MASK(NV097_SET_TEXTURE_FORMAT_MIPMAP_LEVELS, 1) |
MASK(NV097_SET_TEXTURE_FORMAT_BASE_SIZE_U, format_map[format_map_index].XboxSwizzled ? bsf(kTextureWidth) : 0) |
MASK(NV097_SET_TEXTURE_FORMAT_BASE_SIZE_V,
format_map[format_map_index].XboxSwizzled ? bsf(kTextureHeight) : 0) |
MASK(NV097_SET_TEXTURE_FORMAT_BASE_SIZE_P, 0);
// set stage 0 texture address & format
p = pb_push2(p, NV20_TCL_PRIMITIVE_3D_TX_OFFSET(0), (DWORD)texture_memory & 0x03ffffff, format_mask);
if (!format_map[format_map_index].XboxSwizzled) {
// set stage 0 texture pitch (pitch<<16)
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_NPOT_PITCH(0),
(format_map[format_map_index].XboxBpp * kTextureWidth) << 16);
// set stage 0 texture kFramebufferWidth & kFramebufferHeight
// ((width<<16)|kFramebufferHeight)
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_NPOT_SIZE(0), (kTextureWidth << 16) | kTextureHeight);
}
// set stage 0 texture modes
// (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp 4=border 5=clamp to edge)
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_WRAP(0), 0x00030303);
// set stage 0 texture enable flags
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_ENABLE(0), 0x4003ffc0);
// set stage 0 texture filters (AA!)
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_FILTER(0), 0x04074000);
pb_end(p);
/* Disable other texture stages */
p = pb_begin();
// set stage 1 texture enable flags (bit30 disabled)
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_ENABLE(1), 0x0003ffc0);
// set stage 2 texture enable flags (bit30 disabled)
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_ENABLE(2), 0x0003ffc0);
// set stage 3 texture enable flags (bit30 disabled)
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_ENABLE(3), 0x0003ffc0);
// set stage 1 texture modes (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp
// 4=border 5=clamp to edge)
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_WRAP(1), 0x00030303);
// set stage 2 texture modes (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp
// 4=border 5=clamp to edge)
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_WRAP(2), 0x00030303);
// set stage 3 texture modes (0x0W0V0U wrapping: 1=wrap 2=mirror 3=clamp
// 4=border 5=clamp to edge)
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_WRAP(3), 0x00030303);
// set stage 1 texture filters (no AA, stage not even used)
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_FILTER(1), 0x02022000);
// set stage 2 texture filters (no AA, stage not even used)
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_FILTER(2), 0x02022000);
// set stage 3 texture filters (no AA, stage not even used)
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_TX_FILTER(3), 0x02022000);
pb_end(p);
/* Send shader constants
*
* WARNING: Changing shader source code may impact constant locations!
* Check the intermediate file (*.inl) for the expected locations after
* changing the code.
*/
p = pb_begin();
/* Set shader constants cursor at C0 */
p = pb_push1(p, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_ID, 96);
/* Send the model matrix */
pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 16);
memcpy(p, m_model, 16 * 4);
p += 16;
/* Send the view matrix */
pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 16);
memcpy(p, m_view, 16 * 4);
p += 16;
/* Send the projection matrix */
pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 16);
memcpy(p, m_proj, 16 * 4);
p += 16;
/* Send camera position */
pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 4);
memcpy(p, v_cam_pos, 4 * 4);
p += 4;
/* Send light direction */
pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 4);
memcpy(p, v_light_dir, 4 * 4);
p += 4;
/* Send shader constants */
float constants_0[4] = {0, 0, 0, 0};
pb_push(p++, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 4);
memcpy(p, constants_0, 4 * 4);
p += 4;
/* Clear all attributes */
pb_push(p++, NV097_SET_VERTEX_DATA_ARRAY_FORMAT, 16);
for (auto i = 0; i < 16; i++) {
*(p++) = 2;
}
pb_end(p);
/*
* Setup vertex attributes
*/
Vertex *vptr = format_map[format_map_index].XboxSwizzled ? alloc_vertices_swizzled : alloc_vertices;
/* Set vertex position attribute */
set_attrib_pointer(NV2A_VERTEX_ATTR_POSITION, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F, 3, sizeof(Vertex),
&vptr[0].pos);
/* Set texture coordinate attribute */
set_attrib_pointer(NV2A_VERTEX_ATTR_TEXTURE0, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F, 2, sizeof(Vertex),
&vptr[0].texcoord);
/* Set vertex normal attribute */
set_attrib_pointer(NV2A_VERTEX_ATTR_NORMAL, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F, 3, sizeof(Vertex),
&vptr[0].normal);
/* Begin drawing triangles */
draw_arrays(NV097_SET_BEGIN_END_OP_TRIANGLES, 0, kNumVertices);
/* Draw some text on the screen */
pb_print("N: %s\n", format_map[format_map_index].Name);
pb_print("F: 0x%x\n", format_map[format_map_index].XboxFormat);
pb_print("SZ: %d\n", format_map[format_map_index].XboxSwizzled);
pb_print("C: %d\n", format_map[format_map_index].RequireConversion);
pb_print("W: %d\n", kTextureWidth);
pb_print("H: %d\n", kTextureHeight);
pb_print("P: %d\n", format_map[format_map_index].XboxBpp * kTextureWidth);
pb_print("ERR: %d\n", update_texture_result);
pb_draw_text_screen();
while (pb_busy()) {
/* Wait for completion... */
}
/* Swap buffers (if we can) */
while (pb_finished()) {
/* Not ready to swap yet */
}
if (render_changed) {
render_changed = false;
save_framebuffer(framebuffer, format_map_index);
}
}
#pragma clang diagnostic pop
/* Unreachable cleanup code */
SDL_GameControllerClose(gameController);
SDL_QuitSubSystem(SDL_INIT_GAMECONTROLLER);
MmFreeContiguousMemory(alloc_vertices);
MmFreeContiguousMemory(alloc_vertices_swizzled);
MmFreeContiguousMemory(texture_memory);
pb_show_debug_screen();
pb_kill();
return 0;
}
static void save_framebuffer(uint8_t *framebuffer, int format_map_index) {}
static int update_texture_memory(uint8_t *texture_memory, TextureFormatInfo format, int width, int height) {
// create source surface
SDL_Surface *gradient_surface = SDL_CreateRGBSurfaceWithFormat(0, width, height, 32, SDL_PIXELFORMAT_RGBA8888);
if (gradient_surface == nullptr) {
return 1;
}
if (SDL_LockSurface(gradient_surface)) {
return 2;
}
// TODO: have different color patterns controlled by alternate gamepad
// button(s) generate basic gradient pattern
auto pixels = static_cast<uint32_t *>(gradient_surface->pixels);
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x++) {
int x_normal = static_cast<int>(static_cast<float>(x) * 255.0f / static_cast<float>(width));
int y_normal = static_cast<int>(static_cast<float>(y) * 255.0f / static_cast<float>(height));
pixels[y * width + x] =
SDL_MapRGBA(gradient_surface->format, y_normal, x_normal, 255 - y_normal, x_normal + y_normal);
}
SDL_UnlockSurface(gradient_surface);
// if conversion required, do so, otherwise use SDL to convert
if (format.RequireConversion) {
uint8_t *dstP = texture_memory;
// TODO: potential reference material -
// https://github.com/scalablecory/colors/blob/master/color.c
switch (format.XboxFormat) {
case NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8: // YUY2 aka
// YUYV
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x += 2) {
uint8_t R0, G0, B0, R1, G1, B1;
SDL_GetRGB(pixels[y * width + x], gradient_surface->format, &R0, &G0, &B0);
SDL_GetRGB(pixels[y * width + x + 1], gradient_surface->format, &R1, &G1, &B1);
dstP[0] = (0.257f * R0) + (0.504f * G0) + (0.098f * B0) + 16; // Y0
dstP[1] = -(0.148f * R1) - (0.291f * G1) + (0.439f * B1) + 128; // U
dstP[2] = (0.257f * R1) + (0.504f * G1) + (0.098f * B1) + 16; // Y1
dstP[3] = (0.439f * R1) - (0.368f * G1) - (0.071f * B1) + 128; // V
dstP += 4;
}
break;
case NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8: // UYVY
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x += 2) {
uint8_t R0, G0, B0, R1, G1, B1;
SDL_GetRGB(pixels[y * width + x], gradient_surface->format, &R0, &G0, &B0);
SDL_GetRGB(pixels[y * width + x + 1], gradient_surface->format, &R1, &G1, &B1);
dstP[0] = -(0.148f * R1) - (0.291f * G1) + (0.439f * B1) + 128; // U
dstP[1] = (0.257f * R0) + (0.504f * G0) + (0.098f * B0) + 16; // Y0
dstP[2] = (0.439f * R1) - (0.368f * G1) - (0.071f * B1) + 128; // V
dstP[3] = (0.257f * R1) + (0.504f * G1) + (0.098f * B1) + 16; // Y1
dstP += 4;
}
break;
case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8:
// TODO: for now, just let default gradient happen
break;
default:
SDL_FreeSurface(gradient_surface);
return 3;
break;
}
// TODO: swizzling
SDL_FreeSurface(gradient_surface);
} else {
// standard SDL conversion to destination format
SDL_Surface *new_surf = SDL_ConvertSurfaceFormat(gradient_surface, format.SdlFormat, 0);
SDL_FreeSurface(gradient_surface);
if (!new_surf) {
return 4;
}
// copy pixels over to texture memory, swizzling if desired
if (format.XboxSwizzled) {
swizzle_rect((uint8_t *)new_surf->pixels, new_surf->w, new_surf->h, texture_memory, new_surf->pitch,
new_surf->format->BytesPerPixel);
} else {
memcpy(texture_memory, new_surf->pixels, new_surf->pitch * new_surf->h);
}
SDL_FreeSurface(new_surf);
}
return 0;
}
static void init_vertices() {
// real nv2a hardware seems to cache this and not honor updates? have separate
// vertex buffers for swizzled and linear for now...
alloc_vertices = static_cast<Vertex *>(
MmAllocateContiguousMemoryEx(sizeof(vertices), 0, MAXRAM, 0, PAGE_WRITECOMBINE | PAGE_READWRITE));
alloc_vertices_swizzled = static_cast<Vertex *>(
MmAllocateContiguousMemoryEx(sizeof(vertices), 0, MAXRAM, 0, PAGE_WRITECOMBINE | PAGE_READWRITE));
memcpy(alloc_vertices, vertices, sizeof(vertices));
memcpy(alloc_vertices_swizzled, vertices, sizeof(vertices));
for (int i = 0; i < kNumVertices; i++) {
if (alloc_vertices[i].texcoord[0] != 0.0f) {
alloc_vertices[i].texcoord[0] = static_cast<float>(kTextureWidth);
}
if (alloc_vertices[i].texcoord[1] != 0.0f) {
alloc_vertices[i].texcoord[1] = static_cast<float>(kTextureHeight);
}
}
}
static void init_matrices() {
/* Create view matrix (our camera is static) */
matrix_unit(m_view);
create_world_view(m_view, v_cam_pos, v_cam_rot);
/* Create projection matrix */
matrix_unit(m_proj);
create_view_screen(m_proj, (float)kFramebufferWidth / (float)kFramebufferHeight, -1.0f, 1.0f, -1.0f, 1.0f, 1.0f,
10000.0f);
/* Create viewport matrix, combine with projection */
{
float m_viewport[4][4];
matrix_viewport(m_viewport, 0, 0, (float)kFramebufferWidth, (float)kFramebufferHeight, 0, 65536.0f);
matrix_multiply(m_proj, m_proj, (float *)m_viewport);
}
/* Create local->world matrix given our updated object */
matrix_unit(m_model);
}
/* Construct a viewport transformation matrix */
static void matrix_viewport(float out[4][4], float x, float y, float width, float height, float z_min, float z_max) {
memset(out, 0, 4 * 4 * sizeof(float));
out[0][0] = width / 2.0f;
out[1][1] = height / -2.0f;
out[2][2] = (z_max - z_min) / 2.0f;
out[3][3] = 1.0f;
out[3][0] = x + width / 2.0f;
out[3][1] = y + height / 2.0f;
out[3][2] = (z_min + z_max) / 2.0f;
}
/* Load the shader we will render with */
static void init_shader() {
uint32_t *p;
int i;
/* Setup vertex shader */
uint32_t vs_program[] = {
// clang format off
#include "vs.inl"
// clang format on
};
p = pb_begin();
/* Set run address of shader */
p = pb_push1(p, NV097_SET_TRANSFORM_PROGRAM_START, 0);
/* Set execution mode */
p = pb_push1(
p, NV097_SET_TRANSFORM_EXECUTION_MODE,
MASK(NV097_SET_TRANSFORM_EXECUTION_MODE_MODE, NV097_SET_TRANSFORM_EXECUTION_MODE_MODE_PROGRAM) |
MASK(NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE, NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE_PRIV));
p = pb_push1(p, NV097_SET_TRANSFORM_PROGRAM_CXT_WRITE_EN, 0);
pb_end(p);
/* Set cursor and begin copying program */
p = pb_begin();
p = pb_push1(p, NV097_SET_TRANSFORM_PROGRAM_LOAD, 0);
pb_end(p);
/* Copy program instructions (16-bytes each) */
for (i = 0; i < sizeof(vs_program) / 16; i++) {
p = pb_begin();
pb_push(p++, NV097_SET_TRANSFORM_PROGRAM, 4);
memcpy(p, &vs_program[i * 4], 4 * 4);
p += 4;
pb_end(p);
}
/* Setup fragment shader */
p = pb_begin();
// clang format off
#include "ps.inl"
// clang format on
pb_end(p);
}
/* Set an attribute pointer */
static void set_attrib_pointer(uint32_t index, uint32_t format, unsigned int size, uint32_t stride, const void *data) {
uint32_t *p = pb_begin();
p = pb_push1(p, NV097_SET_VERTEX_DATA_ARRAY_FORMAT + index * 4,
MASK(NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE, format) |
MASK(NV097_SET_VERTEX_DATA_ARRAY_FORMAT_SIZE, size) |
MASK(NV097_SET_VERTEX_DATA_ARRAY_FORMAT_STRIDE, stride));
p = pb_push1(p, NV097_SET_VERTEX_DATA_ARRAY_OFFSET + index * 4, (uint32_t)data & 0x03ffffff);
pb_end(p);
}
/* Send draw commands for the triangles */
static void draw_arrays(unsigned int mode, int start, int count) {
uint32_t *p = pb_begin();
p = pb_push1(p, NV097_SET_BEGIN_END, mode);
// bit 30 means all params go to same register 0x1810
p = pb_push1(p, 0x40000000 | NV097_DRAW_ARRAYS,
MASK(NV097_DRAW_ARRAYS_COUNT, (count - 1)) | MASK(NV097_DRAW_ARRAYS_START_INDEX, start));
p = pb_push1(p, NV097_SET_BEGIN_END, NV097_SET_BEGIN_END_OP_END);
pb_end(p);
}
#pragma clang diagnostic pop

727
math3d.c
View File

@ -1,352 +1,375 @@
//port of ooPo's ps2sdk math3d library
#include <stdio.h>
#include <string.h>
#include <math.h>
#include "math3d.h"
unsigned long times(void *);
#define cpu_ticks() times(0)
//vector functions
void vector_apply(VECTOR output, VECTOR input0, MATRIX input1)
{
VECTOR work;
work[_X]=input0[_X]*input1[_11]+input0[_Y]*input1[_12]+input0[_Z]*input1[_13]+input0[_W]*input1[_14];
work[_Y]=input0[_X]*input1[_21]+input0[_Y]*input1[_22]+input0[_Z]*input1[_23]+input0[_W]*input1[_24];
work[_Z]=input0[_X]*input1[_31]+input0[_Y]*input1[_32]+input0[_Z]*input1[_33]+input0[_W]*input1[_34];
work[_W]=input0[_X]*input1[_41]+input0[_Y]*input1[_42]+input0[_Z]*input1[_43]+input0[_W]*input1[_44];
// Output the result.
vector_copy(output, work);
}
void vector_clamp(VECTOR output, VECTOR input0, float min, float max)
{
VECTOR work;
// Copy the vector.
vector_copy(work, input0);
// Clamp the minimum values.
if (work[_X] < min) { work[_X] = min; }
if (work[_Y] < min) { work[_Y] = min; }
if (work[_Z] < min) { work[_Z] = min; }
if (work[_W] < min) { work[_W] = min; }
// Clamp the maximum values.
if (work[_X] > max) { work[_X] = max; }
if (work[_Y] > max) { work[_Y] = max; }
if (work[_Z] > max) { work[_Z] = max; }
if (work[_W] > max) { work[_W] = max; }
// Output the result.
vector_copy(output, work);
}
void vector_copy(VECTOR output, VECTOR input0)
{
memcpy(output,input0,sizeof(VECTOR));
}
float vector_innerproduct(VECTOR input0, VECTOR input1)
{
VECTOR work0, work1;
// Normalize the first vector.
work0[_X] = (input0[_X] / input0[_W]);
work0[_Y] = (input0[_Y] / input0[_W]);
work0[_Z] = (input0[_Z] / input0[_W]);
work0[_W] = 1.00f;
// Normalize the second vector.
work1[_X] = (input1[_X] / input1[_W]);
work1[_Y] = (input1[_Y] / input1[_W]);
work1[_Z] = (input1[_Z] / input1[_W]);
work1[_W] = 1.00f;
// Return the inner product.
return (work0[_X] * work1[_X]) + (work0[_Y] * work1[_Y]) + (work0[_Z] * work1[_Z]);
}
void vector_multiply(VECTOR output, VECTOR input0, VECTOR input1)
{
VECTOR work;
// Multiply the vectors together.
work[_X] = input0[_X] * input1[_X];
work[_Y] = input0[_Y] * input1[_Y];
work[_Z] = input0[_Z] * input1[_Z];
work[_W] = input0[_W] * input1[_W];
// Output the result.
vector_copy(output, work);
}
void vector_normalize(VECTOR output, VECTOR input0)
{
float k;
k=1.0f/sqrt(input0[_X]*input0[_X]+input0[_Y]*input0[_Y]+input0[_Z]*input0[_Z]);
output[_X]*=k;
output[_Y]*=k;
output[_Z]*=k;
}
void vector_outerproduct(VECTOR output, VECTOR input0, VECTOR input1)
{
VECTOR work;
work[_X]=input0[_Y]*input1[_Z]-input0[_Z]*input1[_Y];
work[_Y]=input0[_Z]*input1[_X]-input0[_X]*input1[_Z];
work[_Z]=input0[_X]*input1[_Y]-input0[_Y]*input1[_X];
// Output the result.
vector_copy(output, work);
}
//matrices function
void matrix_copy(MATRIX output, MATRIX input0)
{
memcpy(output,input0,sizeof(MATRIX));
}
void matrix_inverse(MATRIX output, MATRIX input0)
{
MATRIX work;
// Calculate the inverse of the matrix.
matrix_transpose(work, input0);
work[_14] = 0.00f;
work[_24] = 0.00f;
work[_34] = 0.00f;
work[_41] = -(input0[_41] * work[_11] + input0[_42] * work[_21] + input0[_43] * work[_31]);
work[_42] = -(input0[_41] * work[_12] + input0[_42] * work[_22] + input0[_43] * work[_32]);
work[_43] = -(input0[_41] * work[_13] + input0[_42] * work[_23] + input0[_43] * work[_33]);
work[_44] = 1.00f;
// Output the result.
matrix_copy(output, work);
}
void matrix_multiply(MATRIX output, MATRIX input0, MATRIX input1)
{
MATRIX work;
work[_11]=input0[_11]*input1[_11]+input0[_12]*input1[_21]+input0[_13]*input1[_31]+input0[_14]*input1[_41];
work[_12]=input0[_11]*input1[_12]+input0[_12]*input1[_22]+input0[_13]*input1[_32]+input0[_14]*input1[_42];
work[_13]=input0[_11]*input1[_13]+input0[_12]*input1[_23]+input0[_13]*input1[_33]+input0[_14]*input1[_43];
work[_14]=input0[_11]*input1[_14]+input0[_12]*input1[_24]+input0[_13]*input1[_34]+input0[_14]*input1[_44];
work[_21]=input0[_21]*input1[_11]+input0[_22]*input1[_21]+input0[_23]*input1[_31]+input0[_24]*input1[_41];
work[_22]=input0[_21]*input1[_12]+input0[_22]*input1[_22]+input0[_23]*input1[_32]+input0[_24]*input1[_42];
work[_23]=input0[_21]*input1[_13]+input0[_22]*input1[_23]+input0[_23]*input1[_33]+input0[_24]*input1[_43];
work[_24]=input0[_21]*input1[_14]+input0[_22]*input1[_24]+input0[_23]*input1[_34]+input0[_24]*input1[_44];
work[_31]=input0[_31]*input1[_11]+input0[_32]*input1[_21]+input0[_33]*input1[_31]+input0[_34]*input1[_41];
work[_32]=input0[_31]*input1[_12]+input0[_32]*input1[_22]+input0[_33]*input1[_32]+input0[_34]*input1[_42];
work[_33]=input0[_31]*input1[_13]+input0[_32]*input1[_23]+input0[_33]*input1[_33]+input0[_34]*input1[_43];
work[_34]=input0[_31]*input1[_14]+input0[_32]*input1[_24]+input0[_33]*input1[_34]+input0[_34]*input1[_44];
work[_41]=input0[_41]*input1[_11]+input0[_42]*input1[_21]+input0[_43]*input1[_31]+input0[_44]*input1[_41];
work[_42]=input0[_41]*input1[_12]+input0[_42]*input1[_22]+input0[_43]*input1[_32]+input0[_44]*input1[_42];
work[_43]=input0[_41]*input1[_13]+input0[_42]*input1[_23]+input0[_43]*input1[_33]+input0[_44]*input1[_43];
work[_44]=input0[_41]*input1[_14]+input0[_42]*input1[_24]+input0[_43]*input1[_34]+input0[_44]*input1[_44];
// Output the result.
matrix_copy(output, work);
}
void matrix_rotate(MATRIX output, MATRIX input0, VECTOR input1)
{
MATRIX work;
// Apply the z-axis rotation.
matrix_unit(work);
work[_11] = cosf(input1[2]);
work[_12] = sinf(input1[2]);
work[_21] = -sinf(input1[2]);
work[_22] = cosf(input1[2]);
matrix_multiply(output, input0, work);
// Apply the y-axis rotation.
matrix_unit(work);
work[_11] = cosf(input1[1]);
work[_13] = -sinf(input1[1]);
work[_31] = sinf(input1[1]);
work[_33] = cosf(input1[1]);
matrix_multiply(output, output, work);
// Apply the x-axis rotation.
matrix_unit(work);
work[_22] = cosf(input1[0]);
work[_23] = sinf(input1[0]);
work[_32] = -sinf(input1[0]);
work[_33] = cosf(input1[0]);
matrix_multiply(output, output, work);
}
void matrix_scale(MATRIX output, MATRIX input0, VECTOR input1)
{
MATRIX work;
// Apply the scaling.
matrix_unit(work);
work[_11] = input1[_X];
work[_22] = input1[_Y];
work[_33] = input1[_Z];
matrix_multiply(output, input0, work);
}
void matrix_translate(MATRIX output, MATRIX input0, VECTOR input1)
{
MATRIX work;
// Apply the translation.
matrix_unit(work);
work[_41] = input1[_X];
work[_42] = input1[_Y];
work[_43] = input1[_Z];
matrix_multiply(output, input0, work);
}
void matrix_transpose(MATRIX output, MATRIX input0)
{
MATRIX work;
// Transpose the matrix.
work[_11] = input0[_11];
work[_12] = input0[_21];
work[_13] = input0[_31];
work[_14] = input0[_41];
work[_21] = input0[_12];
work[_22] = input0[_22];
work[_23] = input0[_32];
work[_24] = input0[_42];
work[_31] = input0[_13];
work[_32] = input0[_23];
work[_33] = input0[_33];
work[_34] = input0[_43];
work[_41] = input0[_14];
work[_42] = input0[_24];
work[_43] = input0[_34];
work[_44] = input0[_44];
// Output the result.
matrix_copy(output, work);
}
void matrix_unit(MATRIX output)
{
// Create a unit matrix.
memset(output, 0, sizeof(MATRIX));
output[_11] = 1.00f;
output[_22] = 1.00f;
output[_33] = 1.00f;
output[_44] = 1.00f;
}
//creation functions
void create_local_world(MATRIX local_world, VECTOR translation, VECTOR rotation)
{
// Create the local_world matrix.
matrix_unit(local_world);
matrix_rotate(local_world, local_world, rotation);
matrix_translate(local_world, local_world, translation);
}
void create_local_light(MATRIX local_light, VECTOR rotation)
{
// Create the local_light matrix.
matrix_unit(local_light);
matrix_rotate(local_light, local_light, rotation);
}
void create_world_view(MATRIX world_view, VECTOR translation, VECTOR rotation)
{
VECTOR work0, work1;
// Reverse the translation.
work0[_X] = -translation[_X];
work0[_Y] = -translation[_Y];
work0[_Z] = -translation[_Z];
work0[_W] = translation[_W];
// Reverse the rotation.
work1[_X] = -rotation[_X];
work1[_Y] = -rotation[_Y];
work1[_Z] = -rotation[_Z];
work1[_W] = rotation[_W];
// Create the world_view matrix.
matrix_unit(world_view);
matrix_translate(world_view, world_view, work0);
matrix_rotate(world_view, world_view, work1);
}
void create_view_screen(MATRIX view_screen, float aspect, float left, float right, float bottom, float top, float near, float far)
{
/* We want to create a matrix that transforms
field of view frustum (a truncated pyramid)
into a normalized cuboid (for fast hardware clipping):
w, 0, 0, 0,
0, -h, 0, 0,
0, 0, (f+n) / (f-n), -1,
0, 0, (2*f*n) / (f-n), 0
(w:width,h:height,n:z near,f:z far)
*/
// Apply the aspect ratio adjustment.
left = (left * aspect); right = (right * aspect);
// Create the view_screen matrix.
/* matrix_unit(view_screen);
view_screen[_11] = (2 * near) / (right - left);
view_screen[_22] = (2 * near) / (top - bottom);
view_screen[_31] = (right + left) / (right - left);
view_screen[_32] = (top + bottom) / (top - bottom);
view_screen[_33] = (far + near) / (far - near);
view_screen[_34] = -1.00f;
view_screen[_43] = (2 * far * near) / (far - near);
view_screen[_44] = 0.00f;
//This is good for ps2 clipping, where pixel is considered visible if:
//-w < x < w
//-w < y < w
//-w < z < w
//It's not automatic, it's done by using 'clipw' and testing flags in vu1 code
//Result of the test allows to exclude entire triangle
*/
//For xbox1 clipping, pixel is considered visible if:
//-w < x < w
//-w < y < w
// 0 < z < w
//It's automatic and verified for each pixel before pixel shader is called
//so we need this :
matrix_unit(view_screen);
view_screen[_11] = (2 * near) / (right - left);
view_screen[_22] = (2 * near) / (top - bottom);
view_screen[_31] = - (right + left) / (right - left);
view_screen[_32] = - (top + bottom) / (top - bottom);
view_screen[_33] = - far / (far - near);
view_screen[_34] = - 1.00f;
view_screen[_43] = near * far / (far - near);
view_screen[_44] = 0.00f;
}
void create_local_screen(MATRIX local_screen, MATRIX local_world, MATRIX world_view, MATRIX view_screen)
{
// Create the local_screen matrix.
matrix_unit(local_screen);
matrix_multiply(local_screen, local_screen, local_world);
matrix_multiply(local_screen, local_screen, world_view);
matrix_multiply(local_screen, local_screen, view_screen);
}
// port of ooPo's ps2sdk math3d library
#include "math3d.h"
#include <math.h>
#include <stdio.h>
#include <string.h>
unsigned long times(void *);
#define cpu_ticks() times(0)
// vector functions
void vector_apply(VECTOR output, VECTOR input0, MATRIX input1) {
VECTOR work;
work[_X] = input0[_X] * input1[_11] + input0[_Y] * input1[_12] +
input0[_Z] * input1[_13] + input0[_W] * input1[_14];
work[_Y] = input0[_X] * input1[_21] + input0[_Y] * input1[_22] +
input0[_Z] * input1[_23] + input0[_W] * input1[_24];
work[_Z] = input0[_X] * input1[_31] + input0[_Y] * input1[_32] +
input0[_Z] * input1[_33] + input0[_W] * input1[_34];
work[_W] = input0[_X] * input1[_41] + input0[_Y] * input1[_42] +
input0[_Z] * input1[_43] + input0[_W] * input1[_44];
// Output the result.
vector_copy(output, work);
}
void vector_clamp(VECTOR output, VECTOR input0, float min, float max) {
VECTOR work;
// Copy the vector.
vector_copy(work, input0);
// Clamp the minimum values.
if (work[_X] < min) {
work[_X] = min;
}
if (work[_Y] < min) {
work[_Y] = min;
}
if (work[_Z] < min) {
work[_Z] = min;
}
if (work[_W] < min) {
work[_W] = min;
}
// Clamp the maximum values.
if (work[_X] > max) {
work[_X] = max;
}
if (work[_Y] > max) {
work[_Y] = max;
}
if (work[_Z] > max) {
work[_Z] = max;
}
if (work[_W] > max) {
work[_W] = max;
}
// Output the result.
vector_copy(output, work);
}
void vector_copy(VECTOR output, VECTOR input0) {
memcpy(output, input0, sizeof(VECTOR));
}
float vector_innerproduct(VECTOR input0, VECTOR input1) {
VECTOR work0, work1;
// Normalize the first vector.
work0[_X] = (input0[_X] / input0[_W]);
work0[_Y] = (input0[_Y] / input0[_W]);
work0[_Z] = (input0[_Z] / input0[_W]);
work0[_W] = 1.00f;
// Normalize the second vector.
work1[_X] = (input1[_X] / input1[_W]);
work1[_Y] = (input1[_Y] / input1[_W]);
work1[_Z] = (input1[_Z] / input1[_W]);
work1[_W] = 1.00f;
// Return the inner product.
return (work0[_X] * work1[_X]) + (work0[_Y] * work1[_Y]) +
(work0[_Z] * work1[_Z]);
}
void vector_multiply(VECTOR output, VECTOR input0, VECTOR input1) {
VECTOR work;
// Multiply the vectors together.
work[_X] = input0[_X] * input1[_X];
work[_Y] = input0[_Y] * input1[_Y];
work[_Z] = input0[_Z] * input1[_Z];
work[_W] = input0[_W] * input1[_W];
// Output the result.
vector_copy(output, work);
}
void vector_normalize(VECTOR output, VECTOR input0) {
float k;
k = 1.0f / sqrt(input0[_X] * input0[_X] + input0[_Y] * input0[_Y] +
input0[_Z] * input0[_Z]);
output[_X] *= k;
output[_Y] *= k;
output[_Z] *= k;
}
void vector_outerproduct(VECTOR output, VECTOR input0, VECTOR input1) {
VECTOR work;
work[_X] = input0[_Y] * input1[_Z] - input0[_Z] * input1[_Y];
work[_Y] = input0[_Z] * input1[_X] - input0[_X] * input1[_Z];
work[_Z] = input0[_X] * input1[_Y] - input0[_Y] * input1[_X];
// Output the result.
vector_copy(output, work);
}
// matrices function
void matrix_copy(MATRIX output, MATRIX input0) {
memcpy(output, input0, sizeof(MATRIX));
}
void matrix_inverse(MATRIX output, MATRIX input0) {
MATRIX work;
// Calculate the inverse of the matrix.
matrix_transpose(work, input0);
work[_14] = 0.00f;
work[_24] = 0.00f;
work[_34] = 0.00f;
work[_41] = -(input0[_41] * work[_11] + input0[_42] * work[_21] +
input0[_43] * work[_31]);
work[_42] = -(input0[_41] * work[_12] + input0[_42] * work[_22] +
input0[_43] * work[_32]);
work[_43] = -(input0[_41] * work[_13] + input0[_42] * work[_23] +
input0[_43] * work[_33]);
work[_44] = 1.00f;
// Output the result.
matrix_copy(output, work);
}
void matrix_multiply(MATRIX output, MATRIX input0, MATRIX input1) {
MATRIX work;
work[_11] = input0[_11] * input1[_11] + input0[_12] * input1[_21] +
input0[_13] * input1[_31] + input0[_14] * input1[_41];
work[_12] = input0[_11] * input1[_12] + input0[_12] * input1[_22] +
input0[_13] * input1[_32] + input0[_14] * input1[_42];
work[_13] = input0[_11] * input1[_13] + input0[_12] * input1[_23] +
input0[_13] * input1[_33] + input0[_14] * input1[_43];
work[_14] = input0[_11] * input1[_14] + input0[_12] * input1[_24] +
input0[_13] * input1[_34] + input0[_14] * input1[_44];
work[_21] = input0[_21] * input1[_11] + input0[_22] * input1[_21] +
input0[_23] * input1[_31] + input0[_24] * input1[_41];
work[_22] = input0[_21] * input1[_12] + input0[_22] * input1[_22] +
input0[_23] * input1[_32] + input0[_24] * input1[_42];
work[_23] = input0[_21] * input1[_13] + input0[_22] * input1[_23] +
input0[_23] * input1[_33] + input0[_24] * input1[_43];
work[_24] = input0[_21] * input1[_14] + input0[_22] * input1[_24] +
input0[_23] * input1[_34] + input0[_24] * input1[_44];
work[_31] = input0[_31] * input1[_11] + input0[_32] * input1[_21] +
input0[_33] * input1[_31] + input0[_34] * input1[_41];
work[_32] = input0[_31] * input1[_12] + input0[_32] * input1[_22] +
input0[_33] * input1[_32] + input0[_34] * input1[_42];
work[_33] = input0[_31] * input1[_13] + input0[_32] * input1[_23] +
input0[_33] * input1[_33] + input0[_34] * input1[_43];
work[_34] = input0[_31] * input1[_14] + input0[_32] * input1[_24] +
input0[_33] * input1[_34] + input0[_34] * input1[_44];
work[_41] = input0[_41] * input1[_11] + input0[_42] * input1[_21] +
input0[_43] * input1[_31] + input0[_44] * input1[_41];
work[_42] = input0[_41] * input1[_12] + input0[_42] * input1[_22] +
input0[_43] * input1[_32] + input0[_44] * input1[_42];
work[_43] = input0[_41] * input1[_13] + input0[_42] * input1[_23] +
input0[_43] * input1[_33] + input0[_44] * input1[_43];
work[_44] = input0[_41] * input1[_14] + input0[_42] * input1[_24] +
input0[_43] * input1[_34] + input0[_44] * input1[_44];
// Output the result.
matrix_copy(output, work);
}
void matrix_rotate(MATRIX output, MATRIX input0, VECTOR input1) {
MATRIX work;
// Apply the z-axis rotation.
matrix_unit(work);
work[_11] = cosf(input1[2]);
work[_12] = sinf(input1[2]);
work[_21] = -sinf(input1[2]);
work[_22] = cosf(input1[2]);
matrix_multiply(output, input0, work);
// Apply the y-axis rotation.
matrix_unit(work);
work[_11] = cosf(input1[1]);
work[_13] = -sinf(input1[1]);
work[_31] = sinf(input1[1]);
work[_33] = cosf(input1[1]);
matrix_multiply(output, output, work);
// Apply the x-axis rotation.
matrix_unit(work);
work[_22] = cosf(input1[0]);
work[_23] = sinf(input1[0]);
work[_32] = -sinf(input1[0]);
work[_33] = cosf(input1[0]);
matrix_multiply(output, output, work);
}
void matrix_scale(MATRIX output, MATRIX input0, VECTOR input1) {
MATRIX work;
// Apply the scaling.
matrix_unit(work);
work[_11] = input1[_X];
work[_22] = input1[_Y];
work[_33] = input1[_Z];
matrix_multiply(output, input0, work);
}
void matrix_translate(MATRIX output, MATRIX input0, VECTOR input1) {
MATRIX work;
// Apply the translation.
matrix_unit(work);
work[_41] = input1[_X];
work[_42] = input1[_Y];
work[_43] = input1[_Z];
matrix_multiply(output, input0, work);
}
void matrix_transpose(MATRIX output, MATRIX input0) {
MATRIX work;
// Transpose the matrix.
work[_11] = input0[_11];
work[_12] = input0[_21];
work[_13] = input0[_31];
work[_14] = input0[_41];
work[_21] = input0[_12];
work[_22] = input0[_22];
work[_23] = input0[_32];
work[_24] = input0[_42];
work[_31] = input0[_13];
work[_32] = input0[_23];
work[_33] = input0[_33];
work[_34] = input0[_43];
work[_41] = input0[_14];
work[_42] = input0[_24];
work[_43] = input0[_34];
work[_44] = input0[_44];
// Output the result.
matrix_copy(output, work);
}
void matrix_unit(MATRIX output) {
// Create a unit matrix.
memset(output, 0, sizeof(MATRIX));
output[_11] = 1.00f;
output[_22] = 1.00f;
output[_33] = 1.00f;
output[_44] = 1.00f;
}
// creation functions
void create_local_world(MATRIX local_world, VECTOR translation,
VECTOR rotation) {
// Create the local_world matrix.
matrix_unit(local_world);
matrix_rotate(local_world, local_world, rotation);
matrix_translate(local_world, local_world, translation);
}
void create_local_light(MATRIX local_light, VECTOR rotation) {
// Create the local_light matrix.
matrix_unit(local_light);
matrix_rotate(local_light, local_light, rotation);
}
void create_world_view(MATRIX world_view, VECTOR translation, VECTOR rotation) {
VECTOR work0, work1;
// Reverse the translation.
work0[_X] = -translation[_X];
work0[_Y] = -translation[_Y];
work0[_Z] = -translation[_Z];
work0[_W] = translation[_W];
// Reverse the rotation.
work1[_X] = -rotation[_X];
work1[_Y] = -rotation[_Y];
work1[_Z] = -rotation[_Z];
work1[_W] = rotation[_W];
// Create the world_view matrix.
matrix_unit(world_view);
matrix_translate(world_view, world_view, work0);
matrix_rotate(world_view, world_view, work1);
}
void create_view_screen(MATRIX view_screen, float aspect, float left,
float right, float bottom, float top, float near,
float far) {
/* We want to create a matrix that transforms
field of view frustum (a truncated pyramid)
into a normalized cuboid (for fast hardware clipping):
w, 0, 0, 0,
0, -h, 0, 0,
0, 0, (f+n) / (f-n), -1,
0, 0, (2*f*n) / (f-n), 0
(w:kFramebufferWidth,h:kFramebufferHeight,n:z near,f:z far)
*/
// Apply the aspect ratio adjustment.
left = (left * aspect);
right = (right * aspect);
// Create the view_screen matrix.
/* matrix_unit(view_screen);
view_screen[_11] = (2 * near) / (right - left);
view_screen[_22] = (2 * near) / (top - bottom);
view_screen[_31] = (right + left) / (right - left);
view_screen[_32] = (top + bottom) / (top - bottom);
view_screen[_33] = (far + near) / (far - near);
view_screen[_34] = -1.00f;
view_screen[_43] = (2 * far * near) / (far - near);
view_screen[_44] = 0.00f;
//This is good for ps2 clipping, where pixel is considered visible if:
//-w < x < w
//-w < y < w
//-w < z < w
//It's not automatic, it's done by using 'clipw' and testing flags in vu1
code
//Result of the test allows to exclude entire triangle
*/
// For xbox1 clipping, pixel is considered visible if:
//-w < x < w
//-w < y < w
// 0 < z < w
// It's automatic and verified for each pixel before pixel shader is called
// so we need this :
matrix_unit(view_screen);
view_screen[_11] = (2 * near) / (right - left);
view_screen[_22] = (2 * near) / (top - bottom);
view_screen[_31] = -(right + left) / (right - left);
view_screen[_32] = -(top + bottom) / (top - bottom);
view_screen[_33] = -far / (far - near);
view_screen[_34] = -1.00f;
view_screen[_43] = near * far / (far - near);
view_screen[_44] = 0.00f;
}
void create_local_screen(MATRIX local_screen, MATRIX local_world,
MATRIX world_view, MATRIX view_screen) {
// Create the local_screen matrix.
matrix_unit(local_screen);
matrix_multiply(local_screen, local_screen, local_world);
matrix_multiply(local_screen, local_screen, world_view);
matrix_multiply(local_screen, local_screen, view_screen);
}

View File

@ -1,37 +1,40 @@
//port of ooPo's ps2sdk math3d library
// port of ooPo's ps2sdk math3d library
#ifndef _MATH3D_H_
#define _MATH3D_H_
#ifdef __cplusplus
extern "C" {
#endif
typedef float VECTOR[4];
typedef float MATRIX[16];
//vector indices
#define _X 0
#define _Y 1
#define _Z 2
#define _W 3
// vector indices
#define _X 0
#define _Y 1
#define _Z 2
#define _W 3
//4x4 matrices indices
#define _11 0
#define _12 1
#define _13 2
#define _14 3
#define _21 4
#define _22 5
#define _23 6
#define _24 7
#define _31 8
#define _32 9
#define _33 10
#define _34 11
#define _41 12
#define _42 13
#define _43 14
#define _44 15
// 4x4 matrices indices
#define _11 0
#define _12 1
#define _13 2
#define _14 3
#define _21 4
#define _22 5
#define _23 6
#define _24 7
#define _31 8
#define _32 9
#define _33 10
#define _34 11
#define _41 12
#define _42 13
#define _43 14
#define _44 15
//vector functions
// vector functions
void vector_apply(VECTOR output, VECTOR input0, MATRIX input1);
// Multiply a vector by a matrix, returning a vector.
@ -49,12 +52,13 @@ void vector_multiply(VECTOR output, VECTOR input0, VECTOR input1);
// Multiply two vectors together.
void vector_normalize(VECTOR output, VECTOR input0);
// Normalize a vector by determining its length and dividing its values by this value.
// Normalize a vector by determining its length and dividing its values by this
// value.
void vector_outerproduct(VECTOR output, VECTOR input0, VECTOR input1);
// Calculate the outer product of two vectors.
//matrices functions
// matrices functions
void matrix_copy(MATRIX output, MATRIX input0);
// Copy a matrix.
@ -80,9 +84,10 @@ void matrix_transpose(MATRIX output, MATRIX input0);
void matrix_unit(MATRIX output);
// Create a unit matrix.
//creation functions
// creation functions
void create_local_world(MATRIX local_world, VECTOR translation, VECTOR rotation);
void create_local_world(MATRIX local_world, VECTOR translation,
VECTOR rotation);
// Create a local_world matrix given a translation and rotation.
// Commonly used to describe an object's position and orientation.
@ -94,13 +99,20 @@ void create_world_view(MATRIX world_view, VECTOR translation, VECTOR rotation);
// Create a world_view matrix given a translation and rotation.
// Commonly used to describe a camera's position and rotation.
void create_view_screen(MATRIX view_screen, float aspect, float left, float right, float bottom, float top, float near, float far);
void create_view_screen(MATRIX view_screen, float aspect, float left,
float right, float bottom, float top, float near,
float far);
// Create a view_screen matrix given an aspect and clipping plane values.
// Functionally similar to the opengl function: glFrustum()
void create_local_screen(MATRIX local_screen, MATRIX local_world, MATRIX world_view, MATRIX view_screen);
// Create a local_screen matrix given a local_world, world_view and view_screen matrix.
// Commonly used with vector_apply() to transform vertices for rendering.
void create_local_screen(MATRIX local_screen, MATRIX local_world,
MATRIX world_view, MATRIX view_screen);
// Create a local_screen matrix given a local_world, world_view and view_screen
// matrix. Commonly used with vector_apply() to transform vertices for
// rendering.
#ifdef __cplusplus
};
#endif
#endif

17
nxdk_missing_defines.h Normal file
View File

@ -0,0 +1,17 @@
#ifndef NXDK_ZBUFFER_TESTS_NXDK_MISSING_DEFINES_H
#define NXDK_ZBUFFER_TESTS_NXDK_MISSING_DEFINES_H
// TODO: upstream missing nv2a defines
#define NV2A_VERTEX_ATTR_POSITION 0
#define NV2A_VERTEX_ATTR_NORMAL 2
#define NV2A_VERTEX_ATTR_TEXTURE0 9
#define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8 0x17
#define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8 0x3B
#define NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 0x24
#define NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8 0x25
#define NV097_SET_TEXTURE_FORMAT_COLOR_D16 0x2C // TODO: proper nvidia name
#define NV097_SET_TEXTURE_FORMAT_COLOR_LIN_F16 0x31 // TODO: proper nvidia name
#define NV097_SET_CONTROL0_COLOR_SPACE_CONVERT 0xF0000000
#define NV097_SET_CONTROL0_COLOR_SPACE_CONVERT_CRYCB_TO_RGB 0x1
#endif // NXDK_ZBUFFER_TESTS_NXDK_MISSING_DEFINES_H

View File

@ -36,3 +36,5 @@ struct Vertex vertices[] = {
{ {-0.500000, 0.500000, -0.500000}, {0.000000, 0.000000}, {-1.000000, 0.000000, -0.000000} },
{ {-0.500000, -0.500000, -0.500000}, {0.000000, 1.000000}, {-1.000000, 0.000000, -0.000000} }
};
static constexpr uint32_t kNumVertices = sizeof(vertices) / sizeof(vertices[0]);

163
swizzle.c
View File

@ -1,163 +0,0 @@
/*
* QEMU texture swizzling routines
*
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2013 espes
* Copyright (c) 2007-2010 The Nouveau Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <assert.h>
#include "swizzle.h"
/* This should be pretty straightforward.
* It creates a bit pattern like ..zyxzyxzyx from ..xxx, ..yyy and ..zzz
* If there are no bits left from any component it will pack the other masks
* more tighly (Example: zzxzxzyx = Fewer x than z and even fewer y)
*/
static void generate_swizzle_masks(unsigned int width,
unsigned int height,
unsigned int depth,
uint32_t* mask_x,
uint32_t* mask_y,
uint32_t* mask_z)
{
uint32_t x = 0, y = 0, z = 0;
uint32_t bit = 1;
uint32_t mask_bit = 1;
bool done;
do {
done = true;
if (bit < width) { x |= mask_bit; mask_bit <<= 1; done = false; }
if (bit < height) { y |= mask_bit; mask_bit <<= 1; done = false; }
if (bit < depth) { z |= mask_bit; mask_bit <<= 1; done = false; }
bit <<= 1;
} while(!done);
assert(x ^ y ^ z == (mask_bit - 1));
*mask_x = x;
*mask_y = y;
*mask_z = z;
}
/* This fills a pattern with a value if your value has bits abcd and your
* pattern is 11010100100 this will return: 0a0b0c00d00
*/
static uint32_t fill_pattern(uint32_t pattern, uint32_t value)
{
uint32_t result = 0;
uint32_t bit = 1;
while(value) {
if (pattern & bit) {
/* Copy bit to result */
result |= value & 1 ? bit : 0;
value >>= 1;
}
bit <<= 1;
}
return result;
}
static unsigned int get_swizzled_offset(
unsigned int x, unsigned int y, unsigned int z,
uint32_t mask_x, uint32_t mask_y, uint32_t mask_z,
unsigned int bytes_per_pixel)
{
return bytes_per_pixel * (fill_pattern(mask_x, x)
| fill_pattern(mask_y, y)
| fill_pattern(mask_z, z));
}
void swizzle_box(
const uint8_t *src_buf,
unsigned int width,
unsigned int height,
unsigned int depth,
uint8_t *dst_buf,
unsigned int row_pitch,
unsigned int slice_pitch,
unsigned int bytes_per_pixel)
{
uint32_t mask_x, mask_y, mask_z;
generate_swizzle_masks(width, height, depth, &mask_x, &mask_y, &mask_z);
int x, y, z;
for (z = 0; z < depth; z++) {
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
const uint8_t *src = src_buf
+ y * row_pitch + x * bytes_per_pixel;
uint8_t *dst = dst_buf + get_swizzled_offset(x, y, 0,
mask_x, mask_y, 0,
bytes_per_pixel);
memcpy(dst, src, bytes_per_pixel);
}
}
src_buf += slice_pitch;
}
}
void unswizzle_box(
const uint8_t *src_buf,
unsigned int width,
unsigned int height,
unsigned int depth,
uint8_t *dst_buf,
unsigned int row_pitch,
unsigned int slice_pitch,
unsigned int bytes_per_pixel)
{
uint32_t mask_x, mask_y, mask_z;
generate_swizzle_masks(width, height, depth, &mask_x, &mask_y, &mask_z);
int x, y, z;
for (z = 0; z < depth; z++) {
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
const uint8_t *src = src_buf
+ get_swizzled_offset(x, y, z, mask_x, mask_y, mask_z,
bytes_per_pixel);
uint8_t *dst = dst_buf + y * row_pitch + x * bytes_per_pixel;
memcpy(dst, src, bytes_per_pixel);
}
}
dst_buf += slice_pitch;
}
}
void unswizzle_rect(
const uint8_t *src_buf,
unsigned int width,
unsigned int height,
uint8_t *dst_buf,
unsigned int pitch,
unsigned int bytes_per_pixel)
{
unswizzle_box(src_buf, width, height, 1, dst_buf, pitch, 0, bytes_per_pixel);
}
void swizzle_rect(
const uint8_t *src_buf,
unsigned int width,
unsigned int height,
uint8_t *dst_buf,
unsigned int pitch,
unsigned int bytes_per_pixel)
{
swizzle_box(src_buf, width, height, 1, dst_buf, pitch, 0, bytes_per_pixel);
}

View File

@ -1,60 +0,0 @@
/*
* QEMU texture swizzling routines
*
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2013 espes
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_SWIZZLE_H
#define HW_XBOX_SWIZZLE_H
void swizzle_box(
const uint8_t *src_buf,
unsigned int width,
unsigned int height,
unsigned int depth,
uint8_t *dst_buf,
unsigned int row_pitch,
unsigned int slice_pitch,
unsigned int bytes_per_pixel);
void unswizzle_box(
const uint8_t *src_buf,
unsigned int width,
unsigned int height,
unsigned int depth,
uint8_t *dst_buf,
unsigned int row_pitch,
unsigned int slice_pitch,
unsigned int bytes_per_pixel);
void unswizzle_rect(
const uint8_t *src_buf,
unsigned int width,
unsigned int height,
uint8_t *dst_buf,
unsigned int pitch,
unsigned int bytes_per_pixel);
void swizzle_rect(
const uint8_t *src_buf,
unsigned int width,
unsigned int height,
uint8_t *dst_buf,
unsigned int pitch,
unsigned int bytes_per_pixel);
#endif

144
third_party/swizzle.c vendored Normal file
View File

@ -0,0 +1,144 @@
/*
* QEMU texture swizzling routines
*
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2013 espes
* Copyright (c) 2007-2010 The Nouveau Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "swizzle.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
/* This should be pretty straightforward.
* It creates a bit pattern like ..zyxzyxzyx from ..xxx, ..yyy and ..zzz
* If there are no bits left from any component it will pack the other masks
* more tighly (Example: zzxzxzyx = Fewer x than z and even fewer y)
*/
static void generate_swizzle_masks(unsigned int width, unsigned int height,
unsigned int depth, uint32_t *mask_x,
uint32_t *mask_y, uint32_t *mask_z) {
uint32_t x = 0, y = 0, z = 0;
uint32_t bit = 1;
uint32_t mask_bit = 1;
bool done;
do {
done = true;
if (bit < width) {
x |= mask_bit;
mask_bit <<= 1;
done = false;
}
if (bit < height) {
y |= mask_bit;
mask_bit <<= 1;
done = false;
}
if (bit < depth) {
z |= mask_bit;
mask_bit <<= 1;
done = false;
}
bit <<= 1;
} while (!done);
assert(x ^ y ^ z == (mask_bit - 1));
*mask_x = x;
*mask_y = y;
*mask_z = z;
}
/* This fills a pattern with a value if your value has bits abcd and your
* pattern is 11010100100 this will return: 0a0b0c00d00
*/
static uint32_t fill_pattern(uint32_t pattern, uint32_t value) {
uint32_t result = 0;
uint32_t bit = 1;
while (value) {
if (pattern & bit) {
/* Copy bit to result */
result |= value & 1 ? bit : 0;
value >>= 1;
}
bit <<= 1;
}
return result;
}
static unsigned int get_swizzled_offset(unsigned int x, unsigned int y,
unsigned int z, uint32_t mask_x,
uint32_t mask_y, uint32_t mask_z,
unsigned int bytes_per_pixel) {
return bytes_per_pixel * (fill_pattern(mask_x, x) | fill_pattern(mask_y, y) |
fill_pattern(mask_z, z));
}
void swizzle_box(const uint8_t *src_buf, unsigned int width,
unsigned int height, unsigned int depth, uint8_t *dst_buf,
unsigned int row_pitch, unsigned int slice_pitch,
unsigned int bytes_per_pixel) {
uint32_t mask_x, mask_y, mask_z;
generate_swizzle_masks(width, height, depth, &mask_x, &mask_y, &mask_z);
int x, y, z;
for (z = 0; z < depth; z++) {
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
const uint8_t *src = src_buf + y * row_pitch + x * bytes_per_pixel;
uint8_t *dst = dst_buf + get_swizzled_offset(x, y, 0, mask_x, mask_y, 0,
bytes_per_pixel);
memcpy(dst, src, bytes_per_pixel);
}
}
src_buf += slice_pitch;
}
}
void unswizzle_box(const uint8_t *src_buf, unsigned int width,
unsigned int height, unsigned int depth, uint8_t *dst_buf,
unsigned int row_pitch, unsigned int slice_pitch,
unsigned int bytes_per_pixel) {
uint32_t mask_x, mask_y, mask_z;
generate_swizzle_masks(width, height, depth, &mask_x, &mask_y, &mask_z);
int x, y, z;
for (z = 0; z < depth; z++) {
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
const uint8_t *src =
src_buf + get_swizzled_offset(x, y, z, mask_x, mask_y, mask_z,
bytes_per_pixel);
uint8_t *dst = dst_buf + y * row_pitch + x * bytes_per_pixel;
memcpy(dst, src, bytes_per_pixel);
}
}
dst_buf += slice_pitch;
}
}
void unswizzle_rect(const uint8_t *src_buf, unsigned int width,
unsigned int height, uint8_t *dst_buf, unsigned int pitch,
unsigned int bytes_per_pixel) {
unswizzle_box(src_buf, width, height, 1, dst_buf, pitch, 0, bytes_per_pixel);
}
void swizzle_rect(const uint8_t *src_buf, unsigned int width,
unsigned int height, uint8_t *dst_buf, unsigned int pitch,
unsigned int bytes_per_pixel) {
swizzle_box(src_buf, width, height, 1, dst_buf, pitch, 0, bytes_per_pixel);
}

52
third_party/swizzle.h vendored Normal file
View File

@ -0,0 +1,52 @@
/*
* QEMU texture swizzling routines
*
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2013 espes
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_SWIZZLE_H
#define HW_XBOX_SWIZZLE_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
void swizzle_box(const uint8_t *src_buf, unsigned int width,
unsigned int height, unsigned int depth, uint8_t *dst_buf,
unsigned int row_pitch, unsigned int slice_pitch,
unsigned int bytes_per_pixel);
void unswizzle_box(const uint8_t *src_buf, unsigned int width,
unsigned int height, unsigned int depth, uint8_t *dst_buf,
unsigned int row_pitch, unsigned int slice_pitch,
unsigned int bytes_per_pixel);
void unswizzle_rect(const uint8_t *src_buf, unsigned int width,
unsigned int height, uint8_t *dst_buf, unsigned int pitch,
unsigned int bytes_per_pixel);
void swizzle_rect(const uint8_t *src_buf, unsigned int width,
unsigned int height, uint8_t *dst_buf, unsigned int pitch,
unsigned int bytes_per_pixel);
#ifdef __cplusplus
};
#endif
#endif