mirror of
https://github.com/open-goal/jak-project.git
synced 2024-11-23 06:09:57 +00:00
[jak2] speed up the sky texture animation (#2829)
This saved about 1.6 ms per frame in the city for me (~1.3 saved from not doing sky twice, 0.3 saved in format lookup tables). The big texture animator is about 1.0 ms. ![image](https://github.com/open-goal/jak-project/assets/48171810/c7bc7743-308c-4425-ad14-118e2d483fad)
This commit is contained in:
parent
6f244b11ef
commit
d80b1b8119
@ -59,7 +59,8 @@ inline u32 psmt8_addr(u32 x, u32 y, u32 width) {
|
||||
// column is 16, 4
|
||||
|
||||
// first determine the page
|
||||
u32 pages_per_row = width / 128;
|
||||
// Note: not actually sure what the GS does here...
|
||||
u32 pages_per_row = std::max(1u, width / 128);
|
||||
u32 page_col = x / 128;
|
||||
u32 page_row = y / 64;
|
||||
u32 page_x = x % 128;
|
||||
|
@ -818,6 +818,11 @@ void OpenGLRenderer::draw_renderer_selection_window() {
|
||||
ImGui::Checkbox("Occlusion Cull", &m_render_state.use_occlusion_culling);
|
||||
ImGui::Checkbox("Blackout Loads", &m_enable_fast_blackout_loads);
|
||||
|
||||
if (m_texture_animator && ImGui::TreeNode("Texture Animator")) {
|
||||
m_texture_animator->draw_debug_window();
|
||||
ImGui::TreePop();
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < m_bucket_renderers.size(); i++) {
|
||||
auto renderer = m_bucket_renderers[i].get();
|
||||
if (renderer && !renderer->empty()) {
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#include "game/graphics/texture/TexturePool.h"
|
||||
|
||||
#include "third-party/imgui/imgui.h"
|
||||
|
||||
//#define dprintf(...) printf(__VA_ARGS__)
|
||||
//#define dfmt(...) fmt::print(__VA_ARGS__)
|
||||
#define dprintf(...)
|
||||
@ -193,7 +195,11 @@ GLuint ClutBlender::run(const float* weights) {
|
||||
}
|
||||
|
||||
TextureAnimator::TextureAnimator(ShaderLibrary& shaders, const tfrag3::Level* common_level)
|
||||
: m_common_level(common_level) {
|
||||
: m_common_level(common_level),
|
||||
m_psm32_to_psm8_8_8(8, 8, 8, 64),
|
||||
m_psm32_to_psm8_16_16(16, 16, 16, 64),
|
||||
m_psm32_to_psm8_32_32(32, 32, 16, 64),
|
||||
m_psm32_to_psm8_64_64(64, 64, 64, 64) {
|
||||
glGenVertexArrays(1, &m_vao);
|
||||
glGenBuffers(1, &m_vertex_buffer);
|
||||
glBindVertexArray(m_vao);
|
||||
@ -297,6 +303,10 @@ TextureAnimator::TextureAnimator(ShaderLibrary& shaders, const tfrag3::Level* co
|
||||
"-start", "-end", {});
|
||||
}
|
||||
|
||||
void TextureAnimator::draw_debug_window() {
|
||||
ImGui::Checkbox("fast-scrambler", &m_debug.use_fast_scrambler);
|
||||
}
|
||||
|
||||
int TextureAnimator::create_clut_blender_group(const std::vector<std::string>& textures,
|
||||
const std::string& suffix0,
|
||||
const std::string& suffix1,
|
||||
@ -1131,30 +1141,75 @@ GLuint TextureAnimator::make_or_get_gpu_texture_for_current_shader(TexturePool&
|
||||
switch (m_current_shader.tex0.psm()) {
|
||||
// reading as a different format, needs scrambler.
|
||||
case GsTex0::PSM::PSMT8: {
|
||||
auto p = scoped_prof("scrambler");
|
||||
int w = 1 << m_current_shader.tex0.tw();
|
||||
int h = 1 << m_current_shader.tex0.th();
|
||||
ASSERT(w == vram_entry->tex_width * 2);
|
||||
ASSERT(h == vram_entry->tex_height * 2);
|
||||
ASSERT(m_current_shader.tex0.tbw() == 1);
|
||||
std::vector<u32> rgba_data(w * h);
|
||||
|
||||
Timer timer;
|
||||
m_converter.upload_width(vram_entry->data.data(), m_current_shader.tex0.tbp0(),
|
||||
vram_entry->tex_width, vram_entry->tex_height);
|
||||
|
||||
// also needs clut lookup
|
||||
load_clut_to_converter();
|
||||
{
|
||||
std::vector<u32> rgba_data(w * h);
|
||||
m_converter.download_rgba8888(
|
||||
(u8*)rgba_data.data(), m_current_shader.tex0.tbp0(), m_current_shader.tex0.tbw(), w,
|
||||
h, (int)m_current_shader.tex0.psm(), (int)m_current_shader.tex0.cpsm(),
|
||||
m_current_shader.tex0.cbp(), rgba_data.size() * 4);
|
||||
// file_util::write_rgba_png("out.png", rgba_data.data(), 1 <<
|
||||
// m_current_shader.tex0.tw(),
|
||||
// 1 << m_current_shader.tex0.th());
|
||||
dprintf("processing %d x %d took %.3f ms\n", w, h, timer.getMs());
|
||||
return make_temp_gpu_texture(rgba_data.data(), w, h);
|
||||
const auto& clut_lookup = m_textures.find(m_current_shader.tex0.cbp());
|
||||
if (clut_lookup == m_textures.end()) {
|
||||
printf("set shader referenced an unknown clut texture in %d\n",
|
||||
m_current_shader.tex0.cbp());
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
switch (clut_lookup->second.kind) {
|
||||
case VramEntry::Kind::CLUT16_16_IN_PSM32:
|
||||
break;
|
||||
default:
|
||||
printf("unhandled clut source kind: %d\n", (int)clut_lookup->second.kind);
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
const u32* clut_u32s = (const u32*)clut_lookup->second.data.data();
|
||||
|
||||
if (w == 8 && h == 8 && m_debug.use_fast_scrambler) {
|
||||
ASSERT_NOT_REACHED();
|
||||
} else if (w == 16 && h == 16) {
|
||||
for (int i = 0; i < 16 * 16; i++) {
|
||||
memcpy(&rgba_data[m_psm32_to_psm8_8_8.destinations_per_byte[i]],
|
||||
&clut_u32s[m_clut_table.addrs[vram_entry->data[i]]], 4);
|
||||
}
|
||||
} else if (w == 32 && h == 32 && m_debug.use_fast_scrambler) {
|
||||
for (int i = 0; i < 32 * 32; i++) {
|
||||
rgba_data[m_psm32_to_psm8_16_16.destinations_per_byte[i]] =
|
||||
clut_u32s[m_clut_table.addrs[vram_entry->data[i]]];
|
||||
}
|
||||
} else if (w == 64 && h == 64 && m_debug.use_fast_scrambler) {
|
||||
for (int i = 0; i < 64 * 64; i++) {
|
||||
rgba_data[m_psm32_to_psm8_32_32.destinations_per_byte[i]] =
|
||||
clut_u32s[m_clut_table.addrs[vram_entry->data[i]]];
|
||||
}
|
||||
} else if (w == 128 && h == 128 && m_debug.use_fast_scrambler) {
|
||||
for (int i = 0; i < 128 * 128; i++) {
|
||||
rgba_data[m_psm32_to_psm8_64_64.destinations_per_byte[i]] =
|
||||
clut_u32s[m_clut_table.addrs[vram_entry->data[i]]];
|
||||
}
|
||||
} else {
|
||||
Timer timer;
|
||||
m_converter.upload_width(vram_entry->data.data(), m_current_shader.tex0.tbp0(),
|
||||
vram_entry->tex_width, vram_entry->tex_height);
|
||||
|
||||
// also needs clut lookup
|
||||
load_clut_to_converter();
|
||||
{
|
||||
m_converter.download_rgba8888(
|
||||
(u8*)rgba_data.data(), m_current_shader.tex0.tbp0(), m_current_shader.tex0.tbw(),
|
||||
w, h, (int)m_current_shader.tex0.psm(), (int)m_current_shader.tex0.cpsm(),
|
||||
m_current_shader.tex0.cbp(), rgba_data.size() * 4);
|
||||
// file_util::write_rgba_png("out.png", rgba_data.data(), 1 <<
|
||||
// m_current_shader.tex0.tw(),
|
||||
// 1 << m_current_shader.tex0.th());
|
||||
printf("Scrambler took the slow path %d x %d took %.3f ms\n", w, h, timer.getMs());
|
||||
}
|
||||
}
|
||||
auto ret = make_temp_gpu_texture(rgba_data.data(), w, h);
|
||||
// debug_save_opengl_texture(fmt::format("tex_{}.png", w), ret);
|
||||
return ret;
|
||||
|
||||
ASSERT_NOT_REACHED();
|
||||
} break;
|
||||
default:
|
||||
@ -1165,26 +1220,6 @@ GLuint TextureAnimator::make_or_get_gpu_texture_for_current_shader(TexturePool&
|
||||
case VramEntry::Kind::CLUT16_16_IN_PSM32:
|
||||
ASSERT_NOT_REACHED();
|
||||
|
||||
/*
|
||||
case VramEntry::Kind::GENERIC_PSMT8: {
|
||||
fmt::print("drawing: {}\n", (int)m_current_shader.tex0.psm());
|
||||
ASSERT(m_current_shader.tex0.psm() == GsTex0::PSM::PSMT8);
|
||||
ASSERT(m_current_shader.tex0.cpsm() == 0); // psm32.
|
||||
int tw = 1 << m_current_shader.tex0.tw();
|
||||
int th = 1 << m_current_shader.tex0.th();
|
||||
ASSERT(tw == vram_entry->tex_width);
|
||||
ASSERT(th == vram_entry->tex_height);
|
||||
std::vector<u32> rgba_data(tw * th);
|
||||
const u32* clut = get_current_clut_16_16_psm32();
|
||||
for (int r = 0; r < th; r++) {
|
||||
for (int c = 0; c < tw; c++) {
|
||||
rgba_data[c + r * tw] = clut[vram_entry->data[c + r * tw]];
|
||||
}
|
||||
}
|
||||
return make_temp_gpu_texture(rgba_data.data(), tw, th);
|
||||
}
|
||||
*/
|
||||
|
||||
break;
|
||||
default:
|
||||
ASSERT_NOT_REACHED();
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "common/dma/dma_chain_read.h"
|
||||
#include "common/dma/gs.h"
|
||||
#include "common/math/Vector.h"
|
||||
#include "common/texture/texture_conversion.h"
|
||||
|
||||
#include "game/graphics/opengl_renderer/Shader.h"
|
||||
#include "game/graphics/opengl_renderer/opengl_utils.h"
|
||||
@ -80,6 +81,79 @@ class ClutBlender {
|
||||
std::vector<u32> m_temp_rgba;
|
||||
};
|
||||
|
||||
struct Psm32ToPsm8Scrambler {
|
||||
Psm32ToPsm8Scrambler(int w, int h, int write_tex_width, int read_tex_width) {
|
||||
struct InAddr {
|
||||
int x = -1, y = -1, c = -1;
|
||||
};
|
||||
struct OutAddr {
|
||||
int x = -1, y = -1;
|
||||
};
|
||||
|
||||
std::vector<InAddr> vram_from_in(w * h * 4);
|
||||
std::vector<OutAddr> vram_from_out(w * h * 4);
|
||||
|
||||
// loop over pixels in input
|
||||
for (int y = 0; y < h; y++) {
|
||||
for (int x = 0; x < w; x++) {
|
||||
int byte_addr = psmct32_addr(x, y, write_tex_width);
|
||||
for (int c = 0; c < 4; c++) {
|
||||
auto& s = vram_from_in.at(byte_addr + c);
|
||||
s.x = x;
|
||||
s.y = y;
|
||||
s.c = c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// output
|
||||
for (int y = 0; y < h * 2; y++) {
|
||||
for (int x = 0; x < w * 2; x++) {
|
||||
int byte_addr = psmt8_addr(x, y, read_tex_width);
|
||||
auto& s = vram_from_out.at(byte_addr);
|
||||
s.x = x;
|
||||
s.y = y;
|
||||
}
|
||||
}
|
||||
|
||||
destinations_per_byte.resize(4 * w * h);
|
||||
for (size_t i = 0; i < vram_from_out.size(); i++) {
|
||||
auto& in = vram_from_in.at(i);
|
||||
auto& out = vram_from_out.at(i);
|
||||
if (in.c >= 0) {
|
||||
destinations_per_byte.at(in.c + in.x * 4 + in.y * 4 * w) = out.x + out.y * w * 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> destinations_per_byte;
|
||||
};
|
||||
|
||||
struct ClutReader {
|
||||
std::array<int, 256> addrs;
|
||||
ClutReader() {
|
||||
for (int i = 0; i < 256; i++) {
|
||||
u32 clut_chunk = i / 16;
|
||||
u32 off_in_chunk = i % 16;
|
||||
u8 clx = 0, cly = 0;
|
||||
if (clut_chunk & 1) {
|
||||
clx = 8;
|
||||
}
|
||||
cly = (clut_chunk >> 1) * 2;
|
||||
if (off_in_chunk >= 8) {
|
||||
off_in_chunk -= 8;
|
||||
cly++;
|
||||
}
|
||||
clx += off_in_chunk;
|
||||
|
||||
// the x, y CLUT value is looked up in PSMCT32 mode
|
||||
u32 clut_addr = clx + cly * 16;
|
||||
ASSERT(clut_addr < 256);
|
||||
addrs[i] = clut_addr;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class TexturePool;
|
||||
|
||||
class TextureAnimator {
|
||||
@ -88,6 +162,7 @@ class TextureAnimator {
|
||||
~TextureAnimator();
|
||||
void handle_texture_anim_data(DmaFollower& dma, const u8* ee_mem, TexturePool* texture_pool);
|
||||
GLuint get_by_slot(int idx);
|
||||
void draw_debug_window();
|
||||
const std::vector<GLuint>* slots() { return &m_output_slots; }
|
||||
|
||||
private:
|
||||
@ -170,6 +245,10 @@ class TextureAnimator {
|
||||
GLuint tcc;
|
||||
} m_uniforms;
|
||||
|
||||
struct {
|
||||
bool use_fast_scrambler = true;
|
||||
} m_debug;
|
||||
|
||||
GLuint m_shader_id;
|
||||
GLuint m_dummy_texture;
|
||||
|
||||
@ -201,15 +280,7 @@ class TextureAnimator {
|
||||
const std::optional<std::string>& dgo);
|
||||
void run_clut_blender_group(DmaTransfer& tf, int idx);
|
||||
|
||||
// std::vector<ClutBlender> m_darkjak_blenders;
|
||||
// std::vector<int> m_darkjak_output_slots;
|
||||
//
|
||||
// std::vector<ClutBlender> m_jakb_prison_blenders;
|
||||
// std::vector<int> m_jakb_prison_output_slots;
|
||||
//
|
||||
// std::vector<ClutBlender> m_jakb_oracle_blenders;
|
||||
// std::vector<int> m_jakb_oracle_slots;
|
||||
//
|
||||
// std::vector<ClutBlender> m_jakb_nest_blenders;
|
||||
// std::vector<int> m_jakb_nest_slots;
|
||||
Psm32ToPsm8Scrambler m_psm32_to_psm8_8_8, m_psm32_to_psm8_16_16, m_psm32_to_psm8_32_32,
|
||||
m_psm32_to_psm8_64_64;
|
||||
ClutReader m_clut_table;
|
||||
};
|
||||
|
@ -2745,3 +2745,14 @@ This commonly includes things such as:
|
||||
:size-assert #x80
|
||||
:flag-assert #xf00000080
|
||||
)
|
||||
|
||||
|
||||
(defmacro spawn-gem-near-target! (&key (count 5))
|
||||
`(dotimes (i ,count)
|
||||
(birth-pickup-at-point
|
||||
(vector+! (new 'stack 'vector) (target-pos 0) (new 'static 'vector :y (meters 2.0)))
|
||||
(pickup-type gem)
|
||||
1.0
|
||||
#t
|
||||
*entity-pool*
|
||||
(the fact-info #f))))
|
@ -583,6 +583,13 @@
|
||||
(cond
|
||||
((or (= anim-array *sky-texture-anim-array*)
|
||||
)
|
||||
|
||||
(when (= bucket (bucket-id tex-lcom-sky-post))
|
||||
;; skip. I believe this is only used to generate the envmap texture for the ocean.
|
||||
;; it generates the exact same thing, so if we want this on PC one day, we can just
|
||||
;; steal if from the beginning of the frame.
|
||||
(return #f)
|
||||
)
|
||||
;; for sky, we basically emulate the full thing
|
||||
;; (format *stdcon* "doing sky to bucket ~d~%" bucket)
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user