diff --git a/common/texture/texture_conversion.h b/common/texture/texture_conversion.h index 5e9159d58..cdcb62f2b 100644 --- a/common/texture/texture_conversion.h +++ b/common/texture/texture_conversion.h @@ -59,7 +59,8 @@ inline u32 psmt8_addr(u32 x, u32 y, u32 width) { // column is 16, 4 // first determine the page - u32 pages_per_row = width / 128; + // Note: not actually sure what the GS does here... + u32 pages_per_row = std::max(1u, width / 128); u32 page_col = x / 128; u32 page_row = y / 64; u32 page_x = x % 128; diff --git a/game/graphics/opengl_renderer/OpenGLRenderer.cpp b/game/graphics/opengl_renderer/OpenGLRenderer.cpp index 805d431fe..db82a53e4 100644 --- a/game/graphics/opengl_renderer/OpenGLRenderer.cpp +++ b/game/graphics/opengl_renderer/OpenGLRenderer.cpp @@ -818,6 +818,11 @@ void OpenGLRenderer::draw_renderer_selection_window() { ImGui::Checkbox("Occlusion Cull", &m_render_state.use_occlusion_culling); ImGui::Checkbox("Blackout Loads", &m_enable_fast_blackout_loads); + if (m_texture_animator && ImGui::TreeNode("Texture Animator")) { + m_texture_animator->draw_debug_window(); + ImGui::TreePop(); + } + for (size_t i = 0; i < m_bucket_renderers.size(); i++) { auto renderer = m_bucket_renderers[i].get(); if (renderer && !renderer->empty()) { diff --git a/game/graphics/opengl_renderer/TextureAnimator.cpp b/game/graphics/opengl_renderer/TextureAnimator.cpp index 1bc487435..3cf926962 100644 --- a/game/graphics/opengl_renderer/TextureAnimator.cpp +++ b/game/graphics/opengl_renderer/TextureAnimator.cpp @@ -7,6 +7,8 @@ #include "game/graphics/texture/TexturePool.h" +#include "third-party/imgui/imgui.h" + //#define dprintf(...) printf(__VA_ARGS__) //#define dfmt(...) fmt::print(__VA_ARGS__) #define dprintf(...) @@ -193,7 +195,11 @@ GLuint ClutBlender::run(const float* weights) { } TextureAnimator::TextureAnimator(ShaderLibrary& shaders, const tfrag3::Level* common_level) - : m_common_level(common_level) { + : m_common_level(common_level), + m_psm32_to_psm8_8_8(8, 8, 8, 64), + m_psm32_to_psm8_16_16(16, 16, 16, 64), + m_psm32_to_psm8_32_32(32, 32, 16, 64), + m_psm32_to_psm8_64_64(64, 64, 64, 64) { glGenVertexArrays(1, &m_vao); glGenBuffers(1, &m_vertex_buffer); glBindVertexArray(m_vao); @@ -297,6 +303,10 @@ TextureAnimator::TextureAnimator(ShaderLibrary& shaders, const tfrag3::Level* co "-start", "-end", {}); } +void TextureAnimator::draw_debug_window() { + ImGui::Checkbox("fast-scrambler", &m_debug.use_fast_scrambler); +} + int TextureAnimator::create_clut_blender_group(const std::vector& textures, const std::string& suffix0, const std::string& suffix1, @@ -1131,30 +1141,75 @@ GLuint TextureAnimator::make_or_get_gpu_texture_for_current_shader(TexturePool& switch (m_current_shader.tex0.psm()) { // reading as a different format, needs scrambler. case GsTex0::PSM::PSMT8: { + auto p = scoped_prof("scrambler"); int w = 1 << m_current_shader.tex0.tw(); int h = 1 << m_current_shader.tex0.th(); ASSERT(w == vram_entry->tex_width * 2); ASSERT(h == vram_entry->tex_height * 2); + ASSERT(m_current_shader.tex0.tbw() == 1); + std::vector rgba_data(w * h); - Timer timer; - m_converter.upload_width(vram_entry->data.data(), m_current_shader.tex0.tbp0(), - vram_entry->tex_width, vram_entry->tex_height); - - // also needs clut lookup - load_clut_to_converter(); - { - std::vector rgba_data(w * h); - m_converter.download_rgba8888( - (u8*)rgba_data.data(), m_current_shader.tex0.tbp0(), m_current_shader.tex0.tbw(), w, - h, (int)m_current_shader.tex0.psm(), (int)m_current_shader.tex0.cpsm(), - m_current_shader.tex0.cbp(), rgba_data.size() * 4); - // file_util::write_rgba_png("out.png", rgba_data.data(), 1 << - // m_current_shader.tex0.tw(), - // 1 << m_current_shader.tex0.th()); - dprintf("processing %d x %d took %.3f ms\n", w, h, timer.getMs()); - return make_temp_gpu_texture(rgba_data.data(), w, h); + const auto& clut_lookup = m_textures.find(m_current_shader.tex0.cbp()); + if (clut_lookup == m_textures.end()) { + printf("set shader referenced an unknown clut texture in %d\n", + m_current_shader.tex0.cbp()); + ASSERT_NOT_REACHED(); } + switch (clut_lookup->second.kind) { + case VramEntry::Kind::CLUT16_16_IN_PSM32: + break; + default: + printf("unhandled clut source kind: %d\n", (int)clut_lookup->second.kind); + ASSERT_NOT_REACHED(); + } + + const u32* clut_u32s = (const u32*)clut_lookup->second.data.data(); + + if (w == 8 && h == 8 && m_debug.use_fast_scrambler) { + ASSERT_NOT_REACHED(); + } else if (w == 16 && h == 16) { + for (int i = 0; i < 16 * 16; i++) { + memcpy(&rgba_data[m_psm32_to_psm8_8_8.destinations_per_byte[i]], + &clut_u32s[m_clut_table.addrs[vram_entry->data[i]]], 4); + } + } else if (w == 32 && h == 32 && m_debug.use_fast_scrambler) { + for (int i = 0; i < 32 * 32; i++) { + rgba_data[m_psm32_to_psm8_16_16.destinations_per_byte[i]] = + clut_u32s[m_clut_table.addrs[vram_entry->data[i]]]; + } + } else if (w == 64 && h == 64 && m_debug.use_fast_scrambler) { + for (int i = 0; i < 64 * 64; i++) { + rgba_data[m_psm32_to_psm8_32_32.destinations_per_byte[i]] = + clut_u32s[m_clut_table.addrs[vram_entry->data[i]]]; + } + } else if (w == 128 && h == 128 && m_debug.use_fast_scrambler) { + for (int i = 0; i < 128 * 128; i++) { + rgba_data[m_psm32_to_psm8_64_64.destinations_per_byte[i]] = + clut_u32s[m_clut_table.addrs[vram_entry->data[i]]]; + } + } else { + Timer timer; + m_converter.upload_width(vram_entry->data.data(), m_current_shader.tex0.tbp0(), + vram_entry->tex_width, vram_entry->tex_height); + + // also needs clut lookup + load_clut_to_converter(); + { + m_converter.download_rgba8888( + (u8*)rgba_data.data(), m_current_shader.tex0.tbp0(), m_current_shader.tex0.tbw(), + w, h, (int)m_current_shader.tex0.psm(), (int)m_current_shader.tex0.cpsm(), + m_current_shader.tex0.cbp(), rgba_data.size() * 4); + // file_util::write_rgba_png("out.png", rgba_data.data(), 1 << + // m_current_shader.tex0.tw(), + // 1 << m_current_shader.tex0.th()); + printf("Scrambler took the slow path %d x %d took %.3f ms\n", w, h, timer.getMs()); + } + } + auto ret = make_temp_gpu_texture(rgba_data.data(), w, h); + // debug_save_opengl_texture(fmt::format("tex_{}.png", w), ret); + return ret; + ASSERT_NOT_REACHED(); } break; default: @@ -1165,26 +1220,6 @@ GLuint TextureAnimator::make_or_get_gpu_texture_for_current_shader(TexturePool& case VramEntry::Kind::CLUT16_16_IN_PSM32: ASSERT_NOT_REACHED(); - /* - case VramEntry::Kind::GENERIC_PSMT8: { - fmt::print("drawing: {}\n", (int)m_current_shader.tex0.psm()); - ASSERT(m_current_shader.tex0.psm() == GsTex0::PSM::PSMT8); - ASSERT(m_current_shader.tex0.cpsm() == 0); // psm32. - int tw = 1 << m_current_shader.tex0.tw(); - int th = 1 << m_current_shader.tex0.th(); - ASSERT(tw == vram_entry->tex_width); - ASSERT(th == vram_entry->tex_height); - std::vector rgba_data(tw * th); - const u32* clut = get_current_clut_16_16_psm32(); - for (int r = 0; r < th; r++) { - for (int c = 0; c < tw; c++) { - rgba_data[c + r * tw] = clut[vram_entry->data[c + r * tw]]; - } - } - return make_temp_gpu_texture(rgba_data.data(), tw, th); - } - */ - break; default: ASSERT_NOT_REACHED(); diff --git a/game/graphics/opengl_renderer/TextureAnimator.h b/game/graphics/opengl_renderer/TextureAnimator.h index 50b21a48d..24af3bde7 100644 --- a/game/graphics/opengl_renderer/TextureAnimator.h +++ b/game/graphics/opengl_renderer/TextureAnimator.h @@ -9,6 +9,7 @@ #include "common/dma/dma_chain_read.h" #include "common/dma/gs.h" #include "common/math/Vector.h" +#include "common/texture/texture_conversion.h" #include "game/graphics/opengl_renderer/Shader.h" #include "game/graphics/opengl_renderer/opengl_utils.h" @@ -80,6 +81,79 @@ class ClutBlender { std::vector m_temp_rgba; }; +struct Psm32ToPsm8Scrambler { + Psm32ToPsm8Scrambler(int w, int h, int write_tex_width, int read_tex_width) { + struct InAddr { + int x = -1, y = -1, c = -1; + }; + struct OutAddr { + int x = -1, y = -1; + }; + + std::vector vram_from_in(w * h * 4); + std::vector vram_from_out(w * h * 4); + + // loop over pixels in input + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + int byte_addr = psmct32_addr(x, y, write_tex_width); + for (int c = 0; c < 4; c++) { + auto& s = vram_from_in.at(byte_addr + c); + s.x = x; + s.y = y; + s.c = c; + } + } + } + + // output + for (int y = 0; y < h * 2; y++) { + for (int x = 0; x < w * 2; x++) { + int byte_addr = psmt8_addr(x, y, read_tex_width); + auto& s = vram_from_out.at(byte_addr); + s.x = x; + s.y = y; + } + } + + destinations_per_byte.resize(4 * w * h); + for (size_t i = 0; i < vram_from_out.size(); i++) { + auto& in = vram_from_in.at(i); + auto& out = vram_from_out.at(i); + if (in.c >= 0) { + destinations_per_byte.at(in.c + in.x * 4 + in.y * 4 * w) = out.x + out.y * w * 2; + } + } + } + + std::vector destinations_per_byte; +}; + +struct ClutReader { + std::array addrs; + ClutReader() { + for (int i = 0; i < 256; i++) { + u32 clut_chunk = i / 16; + u32 off_in_chunk = i % 16; + u8 clx = 0, cly = 0; + if (clut_chunk & 1) { + clx = 8; + } + cly = (clut_chunk >> 1) * 2; + if (off_in_chunk >= 8) { + off_in_chunk -= 8; + cly++; + } + clx += off_in_chunk; + + // the x, y CLUT value is looked up in PSMCT32 mode + u32 clut_addr = clx + cly * 16; + ASSERT(clut_addr < 256); + addrs[i] = clut_addr; + } + } +}; + class TexturePool; class TextureAnimator { @@ -88,6 +162,7 @@ class TextureAnimator { ~TextureAnimator(); void handle_texture_anim_data(DmaFollower& dma, const u8* ee_mem, TexturePool* texture_pool); GLuint get_by_slot(int idx); + void draw_debug_window(); const std::vector* slots() { return &m_output_slots; } private: @@ -170,6 +245,10 @@ class TextureAnimator { GLuint tcc; } m_uniforms; + struct { + bool use_fast_scrambler = true; + } m_debug; + GLuint m_shader_id; GLuint m_dummy_texture; @@ -201,15 +280,7 @@ class TextureAnimator { const std::optional& dgo); void run_clut_blender_group(DmaTransfer& tf, int idx); - // std::vector m_darkjak_blenders; - // std::vector m_darkjak_output_slots; - // - // std::vector m_jakb_prison_blenders; - // std::vector m_jakb_prison_output_slots; - // - // std::vector m_jakb_oracle_blenders; - // std::vector m_jakb_oracle_slots; - // - // std::vector m_jakb_nest_blenders; - // std::vector m_jakb_nest_slots; + Psm32ToPsm8Scrambler m_psm32_to_psm8_8_8, m_psm32_to_psm8_16_16, m_psm32_to_psm8_32_32, + m_psm32_to_psm8_64_64; + ClutReader m_clut_table; }; diff --git a/goal_src/jak2/engine/common_objs/collectables.gc b/goal_src/jak2/engine/common_objs/collectables.gc index bd887b57b..b18d6bd6e 100644 --- a/goal_src/jak2/engine/common_objs/collectables.gc +++ b/goal_src/jak2/engine/common_objs/collectables.gc @@ -2745,3 +2745,14 @@ This commonly includes things such as: :size-assert #x80 :flag-assert #xf00000080 ) + + +(defmacro spawn-gem-near-target! (&key (count 5)) + `(dotimes (i ,count) + (birth-pickup-at-point + (vector+! (new 'stack 'vector) (target-pos 0) (new 'static 'vector :y (meters 2.0))) + (pickup-type gem) + 1.0 + #t + *entity-pool* + (the fact-info #f)))) \ No newline at end of file diff --git a/goal_src/jak2/engine/gfx/texture/texture-anim.gc b/goal_src/jak2/engine/gfx/texture/texture-anim.gc index 6e0493383..61390715d 100644 --- a/goal_src/jak2/engine/gfx/texture/texture-anim.gc +++ b/goal_src/jak2/engine/gfx/texture/texture-anim.gc @@ -583,6 +583,13 @@ (cond ((or (= anim-array *sky-texture-anim-array*) ) + + (when (= bucket (bucket-id tex-lcom-sky-post)) + ;; skip. I believe this is only used to generate the envmap texture for the ocean. + ;; it generates the exact same thing, so if we want this on PC one day, we can just + ;; steal if from the beginning of the frame. + (return #f) + ) ;; for sky, we basically emulate the full thing ;; (format *stdcon* "doing sky to bucket ~d~%" bucket) )