[tie] add wind effect (#1046)

* wip tie wind stuff

* wind

* clang
This commit is contained in:
water111 2022-01-02 19:02:28 -05:00 committed by GitHub
parent c9204f2a9b
commit b999422305
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 648 additions and 60 deletions

View File

@ -11,6 +11,20 @@ void StripDraw::serialize(Serializer& ser) {
ser.from_ptr(&num_triangles);
}
void InstancedStripDraw::serialize(Serializer& ser) {
ser.from_ptr(&mode);
ser.from_ptr(&tree_tex_id);
ser.from_pod_vector(&vertex_index_stream);
ser.from_pod_vector(&instance_groups);
ser.from_ptr(&num_triangles);
}
void TieWindInstance::serialize(Serializer& ser) {
ser.from_ptr(&matrix);
ser.from_ptr(&wind_idx);
ser.from_ptr(&stiffness);
}
void TfragTree::serialize(Serializer& ser) {
ser.from_ptr(&kind);
@ -38,6 +52,24 @@ void TieTree::serialize(Serializer& ser) {
draw.serialize(ser);
}
if (ser.is_saving()) {
ser.save<size_t>(instanced_wind_draws.size());
} else {
instanced_wind_draws.resize(ser.load<size_t>());
}
for (auto& draw : instanced_wind_draws) {
draw.serialize(ser);
}
if (ser.is_saving()) {
ser.save<size_t>(instance_info.size());
} else {
instance_info.resize(ser.load<size_t>());
}
for (auto& inst : instance_info) {
inst.serialize(ser);
}
ser.from_pod_vector(&vertices);
ser.from_pod_vector(&colors);
bvh.serialize(ser);

View File

@ -1,6 +1,7 @@
#pragma once
// Data format for the tfrag3 renderer.
#include <array>
#include "common/common_types.h"
#include "common/dma/gs.h"
@ -10,7 +11,7 @@
namespace tfrag3 {
constexpr int TFRAG3_VERSION = 7;
constexpr int TFRAG3_VERSION = 8;
// These vertices should be uploaded to the GPU at load time and don't change
struct PreloadedVertex {
@ -51,6 +52,27 @@ struct StripDraw {
void serialize(Serializer& ser);
};
struct InstancedStripDraw {
DrawMode mode; // the OpenGL draw settings.
u32 tree_tex_id = 0; // the texture that should be bound for the draw
// the list of vertices in the draw. This includes the restart code of UINT32_MAX that OpenGL
// will use to start a new strip.
std::vector<u32> vertex_index_stream;
// the vertex stream above is segmented by instance.
struct InstanceGroup {
u32 num = 0; // number of vertex indices in this group
u32 instance_idx = 0; // the instance they belong to
u32 vis_idx = 0;
};
std::vector<InstanceGroup> instance_groups;
// for debug counting.
u32 num_triangles = 0;
void serialize(Serializer& ser);
};
// node in the BVH.
struct VisNode {
math::Vector<float, 4> bsphere; // the bounding sphere, in meters (4096 = 1 game meter). w = rad
@ -114,6 +136,13 @@ struct TfragTree {
void serialize(Serializer& ser);
};
struct TieWindInstance {
std::array<math::Vector4f, 4> matrix;
u16 wind_idx;
float stiffness;
void serialize(Serializer& ser);
};
// A tie model
struct TieTree {
BVH bvh;
@ -121,7 +150,8 @@ struct TieTree {
std::vector<PreloadedVertex> vertices; // mesh vertices
std::vector<TimeOfDayColor> colors; // vertex colors (pre-interpolation)
// TODO wind stuff
std::vector<InstancedStripDraw> instanced_wind_draws;
std::vector<TieWindInstance> instance_info;
void serialize(Serializer& ser);
};

View File

@ -1041,7 +1041,7 @@ void ProxyPrototypeArrayTie::read_from_file(TypedRef ref,
prototype_array_tie.read_from_file(
get_and_check_ref_to_basic(ref, "prototype-array-tie", "prototype-array-tie", dts), dts,
stats);
// TODO wind
wind_vectors = deref_label(get_field_ref(ref, "wind-vectors", dts));
}
std::string ProxyPrototypeArrayTie::print(const PrintSettings& settings, int indent) const {

View File

@ -352,7 +352,7 @@ struct ProxyPrototypeArrayTie {
std::string print(const PrintSettings& settings, int indent) const;
PrototypeArrayTie prototype_array_tie;
// todo wind vectors.
Ref wind_vectors;
};
struct DrawableTreeInstanceTie : public DrawableTree {

View File

@ -2114,6 +2114,19 @@ void extract_time_of_day(const level_tools::DrawableTreeTfrag* tree, tfrag3::Tfr
}
}
void merge_groups(std::vector<tfrag3::StripDraw::VisGroup>& grps) {
std::vector<tfrag3::StripDraw::VisGroup> result;
result.push_back(grps.at(0));
for (size_t i = 1; i < grps.size(); i++) {
if (grps[i].vis_idx == result.back().vis_idx) {
result.back().num += grps[i].num;
} else {
result.push_back(grps[i]);
}
}
std::swap(result, grps);
}
} // namespace
void extract_tfrag(const level_tools::DrawableTreeTfrag* tree,
@ -2190,6 +2203,7 @@ void extract_tfrag(const level_tools::DrawableTreeTfrag* tree,
str.vis_idx = it->second;
}
}
merge_groups(draw.vis_groups);
}
out.tfrag_trees.push_back(this_tree);
}

View File

@ -165,6 +165,8 @@ struct TieInstanceFragInfo {
// this contains indices into the shared palette.
std::vector<u8> color_indices;
// in the PC port format, we upload a single giant time of day color. this points to the offset
// of the colors from this frag instance.
u16 color_index_offset_in_big_palette = -1;
math::Vector<u32, 4> lq_colors_ui(u32 qw) const {
@ -336,6 +338,25 @@ std::array<math::Vector4f, 4> extract_tie_matrix(const u16* data) {
constexpr int GEOM_IDX = 1; // todo 0 or 1??
/*!
* Confirm that the initial value of all wind vectors is 0.
*/
void check_wind_vectors_zero(const std::vector<TieProtoInfo>& protos, Ref wind_ref) {
u16 max_wind = 0;
for (auto& proto : protos) {
for (auto& inst : proto.instances) {
max_wind = std::max(inst.wind_index, max_wind);
}
}
u32 wind_words = max_wind;
wind_words *= 4;
for (size_t i = 0; i < wind_words; i++) {
auto& word = wind_ref.data->words_by_seg.at(wind_ref.seg).at(wind_ref.byte_offset / 4 + i);
assert(word.kind() == LinkedWord::PLAIN_DATA);
assert(word.data == 0);
}
}
std::vector<TieProtoInfo> collect_instance_info(
const level_tools::DrawableInlineArrayInstanceTie* instances,
const std::vector<level_tools::PrototypeBucketTie>* protos) {
@ -416,6 +437,9 @@ void update_proto_info(std::vector<TieProtoInfo>* out,
info.uses_generic = (proto.flags == 2);
info.name = proto.name;
info.stiffness = proto.stiffness;
if (info.stiffness != 0) {
fmt::print("--------------------proto {} wind {}\n", info.name, info.stiffness);
}
info.generic_flag = proto.flags & 2;
info.time_of_day_colors.resize(proto.time_of_day.height);
@ -1674,13 +1698,6 @@ std::string debug_dump_proto_to_obj(const TieProtoInfo& proto) {
math::Vector<float, 3> transform_tie(const std::array<math::Vector4f, 4> mat,
const math::Vector3f& pt) {
auto temp = mat[0] * pt.x() + mat[1] * pt.y() + mat[2] * pt.z() + mat[3];
// math::Vector4f temp;
// temp.x() = pt.x();
// temp.y() = pt.y();
// temp.z() = pt.z();
// temp += mat[3];
math::Vector3f result;
result.x() = temp.x();
result.y() = temp.y();
@ -1872,17 +1889,33 @@ void add_vertices_and_static_draw(tfrag3::TieTree& tree,
tfrag3::Level& lev,
const TextureDB& tdb,
const std::vector<TieProtoInfo>& protos) {
// our current approach for static draws is just to flatten to giant mesh.
// our current approach for static draws is just to flatten to giant mesh, except for wind stuff.
std::unordered_map<u32, std::vector<u32>> static_draws_by_tex;
std::unordered_map<u32, std::vector<u32>> wind_draws_by_tex;
std::unordered_map<u32, std::vector<u32>> draws_by_tex;
std::unordered_map<u32, u32> interp_hack_colors;
// renumbering instances.
// loop over all prototypes
for (auto& proto : protos) {
// bool using_wind = true; // hack, for testing
bool using_wind = proto.stiffness != 0.f;
// loop over instances of the prototypes
for (auto& inst : proto.instances) {
u32 wind_instance_idx = tree.instance_info.size();
if (using_wind) {
tfrag3::TieWindInstance wind_instance_info;
wind_instance_info.wind_idx = inst.wind_index;
wind_instance_info.stiffness = proto.stiffness;
wind_instance_info.matrix = inst.mat;
tree.instance_info.push_back(wind_instance_info);
}
// loop over fragments of the prototype
for (size_t frag_idx = 0; frag_idx < proto.frags.size(); frag_idx++) {
auto& frag = proto.frags[frag_idx];
auto& ifrag = inst.frags.at(frag_idx);
// loop over triangle strips within the fragment
for (auto& strip : frag.strips) {
// what texture are we using?
u32 combo_tex = strip.adgif.combo_tex;
@ -1930,57 +1963,111 @@ void add_vertices_and_static_draw(tfrag3::TieTree& tree,
DrawMode mode =
process_draw_mode(strip.adgif, frag.prog_info.misc_x == 0, frag.has_magic_tex0_bit);
// okay, we now have a texture and draw mode, let's see if we can add to an existing...
auto existing_draws_in_tex = draws_by_tex.find(idx_in_lev_data);
tfrag3::StripDraw* draw_to_add_to = nullptr;
if (existing_draws_in_tex != draws_by_tex.end()) {
for (auto idx : existing_draws_in_tex->second) {
if (tree.static_draws.at(idx).mode == mode) {
draw_to_add_to = &tree.static_draws[idx];
if (using_wind) {
// okay, we now have a texture and draw mode, let's see if we can add to an existing...
auto existing_draws_in_tex = wind_draws_by_tex.find(idx_in_lev_data);
tfrag3::InstancedStripDraw* draw_to_add_to = nullptr;
if (existing_draws_in_tex != wind_draws_by_tex.end()) {
for (auto idx : existing_draws_in_tex->second) {
if (tree.instanced_wind_draws.at(idx).mode == mode) {
draw_to_add_to = &tree.instanced_wind_draws[idx];
}
}
}
}
if (!draw_to_add_to) {
// nope, need to create a new draw
tree.static_draws.emplace_back();
draws_by_tex[idx_in_lev_data].push_back(tree.static_draws.size() - 1);
draw_to_add_to = &tree.static_draws.back();
draw_to_add_to->mode = mode;
draw_to_add_to->tree_tex_id = idx_in_lev_data;
}
// now we have a draw, time to add vertices
tfrag3::StripDraw::VisGroup vgroup;
vgroup.vis_idx = inst.vis_id; // associate with the tfrag for culling
vgroup.num = strip.verts.size() + 1; // one for the primitive restart!
draw_to_add_to->num_triangles += strip.verts.size() - 2;
for (auto& vert : strip.verts) {
tfrag3::PreloadedVertex vtx;
// todo fields
auto tf = transform_tie(inst.mat, vert.pos);
vtx.x = tf.x();
vtx.y = tf.y();
vtx.z = tf.z();
vtx.s = vert.tex.x();
vtx.t = vert.tex.y();
vtx.q = vert.tex.z();
// if this is true, we can remove a divide in the shader
assert(vtx.q == 1.f);
if (vert.color_index_index == UINT32_MAX) {
vtx.color_index = 0;
} else {
vtx.color_index = ifrag.color_indices.at(vert.color_index_index);
assert(vert.color_index_index < ifrag.color_indices.size());
vtx.color_index += ifrag.color_index_offset_in_big_palette;
if (!draw_to_add_to) {
// nope, need to create a new draw
tree.instanced_wind_draws.emplace_back();
wind_draws_by_tex[idx_in_lev_data].push_back(tree.instanced_wind_draws.size() - 1);
draw_to_add_to = &tree.instanced_wind_draws.back();
draw_to_add_to->mode = mode;
draw_to_add_to->tree_tex_id = idx_in_lev_data;
}
size_t vert_idx = tree.vertices.size();
tree.vertices.push_back(vtx);
draw_to_add_to->vertex_index_stream.push_back(vert_idx);
// now we have a draw, time to add vertices
tfrag3::InstancedStripDraw::InstanceGroup igroup;
igroup.vis_idx = inst.vis_id; // associate with the tfrag for culling
igroup.num = strip.verts.size() + 1; // one for the primitive restart!
igroup.instance_idx = wind_instance_idx;
draw_to_add_to->num_triangles += strip.verts.size() - 2;
// note: this is a bit wasteful to duplicate the xyz/stq.
for (auto& vert : strip.verts) {
tfrag3::PreloadedVertex vtx;
vtx.x = vert.pos.x();
vtx.y = vert.pos.y();
vtx.z = vert.pos.z();
vtx.s = vert.tex.x();
vtx.t = vert.tex.y();
vtx.q = vert.tex.z();
// if this is true, we can remove a divide in the shader
assert(vtx.q == 1.f);
if (vert.color_index_index == UINT32_MAX) {
vtx.color_index = 0;
} else {
vtx.color_index = ifrag.color_indices.at(vert.color_index_index);
assert(vert.color_index_index < ifrag.color_indices.size());
vtx.color_index += ifrag.color_index_offset_in_big_palette;
}
size_t vert_idx = tree.vertices.size();
tree.vertices.push_back(vtx);
draw_to_add_to->vertex_index_stream.push_back(vert_idx);
}
draw_to_add_to->vertex_index_stream.push_back(UINT32_MAX);
draw_to_add_to->instance_groups.push_back(igroup);
} else {
// okay, we now have a texture and draw mode, let's see if we can add to an existing...
auto existing_draws_in_tex = static_draws_by_tex.find(idx_in_lev_data);
tfrag3::StripDraw* draw_to_add_to = nullptr;
if (existing_draws_in_tex != static_draws_by_tex.end()) {
for (auto idx : existing_draws_in_tex->second) {
if (tree.static_draws.at(idx).mode == mode) {
draw_to_add_to = &tree.static_draws[idx];
}
}
}
if (!draw_to_add_to) {
// nope, need to create a new draw
tree.static_draws.emplace_back();
static_draws_by_tex[idx_in_lev_data].push_back(tree.static_draws.size() - 1);
draw_to_add_to = &tree.static_draws.back();
draw_to_add_to->mode = mode;
draw_to_add_to->tree_tex_id = idx_in_lev_data;
}
// now we have a draw, time to add vertices
tfrag3::StripDraw::VisGroup vgroup;
vgroup.vis_idx = inst.vis_id; // associate with the tfrag for culling
vgroup.num = strip.verts.size() + 1; // one for the primitive restart!
draw_to_add_to->num_triangles += strip.verts.size() - 2;
for (auto& vert : strip.verts) {
tfrag3::PreloadedVertex vtx;
// todo fields
auto tf = transform_tie(inst.mat, vert.pos);
vtx.x = tf.x();
vtx.y = tf.y();
vtx.z = tf.z();
vtx.s = vert.tex.x();
vtx.t = vert.tex.y();
vtx.q = vert.tex.z();
// if this is true, we can remove a divide in the shader
assert(vtx.q == 1.f);
if (vert.color_index_index == UINT32_MAX) {
vtx.color_index = 0;
} else {
vtx.color_index = ifrag.color_indices.at(vert.color_index_index);
assert(vert.color_index_index < ifrag.color_indices.size());
vtx.color_index += ifrag.color_index_offset_in_big_palette;
}
size_t vert_idx = tree.vertices.size();
tree.vertices.push_back(vtx);
draw_to_add_to->vertex_index_stream.push_back(vert_idx);
}
draw_to_add_to->vertex_index_stream.push_back(UINT32_MAX);
draw_to_add_to->vis_groups.push_back(vgroup);
}
draw_to_add_to->vertex_index_stream.push_back(UINT32_MAX);
draw_to_add_to->vis_groups.push_back(vgroup);
}
}
}
@ -1992,6 +2079,33 @@ void add_vertices_and_static_draw(tfrag3::TieTree& tree,
});
}
void merge_groups(std::vector<tfrag3::InstancedStripDraw::InstanceGroup>& grps) {
std::vector<tfrag3::InstancedStripDraw::InstanceGroup> result;
result.push_back(grps.at(0));
for (size_t i = 1; i < grps.size(); i++) {
if (grps[i].vis_idx == result.back().vis_idx &&
grps[i].instance_idx == result.back().instance_idx) {
result.back().num += grps[i].num;
} else {
result.push_back(grps[i]);
}
}
std::swap(result, grps);
}
void merge_groups(std::vector<tfrag3::StripDraw::VisGroup>& grps) {
std::vector<tfrag3::StripDraw::VisGroup> result;
result.push_back(grps.at(0));
for (size_t i = 1; i < grps.size(); i++) {
if (grps[i].vis_idx == result.back().vis_idx) {
result.back().num += grps[i].num;
} else {
result.push_back(grps[i]);
}
}
std::swap(result, grps);
}
void extract_tie(const level_tools::DrawableTreeInstanceTie* tree,
const std::string& debug_name,
const std::vector<level_tools::TextureRemap>& tex_map,
@ -2034,6 +2148,7 @@ void extract_tie(const level_tools::DrawableTreeInstanceTie* tree,
auto info = collect_instance_info(as_instance_array, &tree->prototypes.prototype_array_tie.data);
update_proto_info(&info, tex_map, tex_db, tree->prototypes.prototype_array_tie.data);
check_wind_vectors_zero(info, tree->prototypes.wind_vectors);
// debug_print_info(info);
emulate_tie_prototype_program(info);
emulate_tie_instance_program(info);
@ -2064,6 +2179,20 @@ void extract_tie(const level_tools::DrawableTreeInstanceTie* tree,
str.vis_idx = it->second;
}
}
merge_groups(draw.vis_groups);
}
for (auto& draw : this_tree.instanced_wind_draws) {
for (auto& str : draw.instance_groups) {
auto it = instance_parents.find(str.vis_idx);
if (it == instance_parents.end()) {
str.vis_idx = UINT32_MAX;
} else {
str.vis_idx = it->second;
}
}
merge_groups(draw.instance_groups);
}
this_tree.colors = full_palette.colors;

View File

@ -39,12 +39,15 @@ vf10+ is origin, vf20+ is the SHRUB MATRIX!!!
vmulax.xyzw acc, vf20, vf10
vmadday.xyzw acc, vf21, vf10
vmaddz.xyzw vf10, vf22, vf10
vmulax.xyzw acc, vf20, vf11
vmadday.xyzw acc, vf21, vf11
vmaddz.xyzw vf11, vf22, vf11
vmulax.xyzw acc, vf20, vf12
vmadday.xyzw acc, vf21, vf12
vmaddz.xyzw vf12, vf22, vf12
vmulax.xyzw acc, vf20, vf13
vmadday.xyzw acc, vf21, vf13
vmaddaz.xyzw acc, vf22, vf13
@ -61,3 +64,63 @@ For the final instance in the bucket, it will be a ret.
96: color0
# Wind
Wind is only applied if "stiffness" is nonzero.
The first wind data is from the prototype wind-vectors:
s5 = wind_idx * 16 + wind_vectors
The second wind data is
s3 = wind_work + ((wind_idx + wind_work.wind_time) & 63) * 16
= wind_work.wind_array[(wind_idx + wind_work.wind_time) & 63]
```asm
ld s1, 8(s5) # load wind vector 1
pextlw s1, r0, s1 # convert to 2x 64 bits, by shifting left
qmtc2.i vf18, s1 # put in vf
ld s2, 0(s5) # load wind vector 0
pextlw s3, r0, s2 # convert to 2x 64 bits, by shifting left
qmtc2.i vf17, s3 # put in vf
lqc2 vf16, 12(s3) # load wind vector
vmula.xyzw acc, vf16, vf1 # acc = vf16
vmsubax.xyzw acc, vf18, vf19 # acc = vf16 - vf18 * wind_const.x
vmsuby.xyzw vf16, vf17, vf19
# vf16 -= (vf18 * wind_const.x) + (vf17 * wind_const.y)
vmulaz.xyzw acc, vf16, vf19 # acc = vf16 * wind_const.z
vmadd.xyzw vf18, vf1, vf18
# vf18 += vf16 * wind_const.z
vmulaz.xyzw acc, vf18, vf19 # acc = vf18 * wind_const.z
vmadd.xyzw vf17, vf17, vf1
# vf17 += vf18 * wind_const.z
vitof12.xyzw vf11, vf11 # normal convert
vitof12.xyzw vf12, vf12 # normal convert
vminiw.xyzw vf17, vf17, vf0
qmfc2.i s3, vf18
vmaxw.xyzw vf27, vf17, vf19
ppacw s3, r0, s3
vmulw.xyzw vf27, vf27, vf15
vmulax.yw acc, vf0, vf0
vmulay.xz acc, vf27, vf10
vmadd.xyzw vf10, vf1, vf10
qmfc2.i s2, vf27
vmulax.yw acc, vf0, vf0
vmulay.xz acc, vf27, vf11
vmadd.xyzw vf11, vf1, vf11
ppacw s2, r0, s2
vmulax.yw acc, vf0, vf0
vmulay.xz acc, vf27, vf12
vmadd.xyzw vf12, vf1, vf12
if not paused
sd s3, 8(s5)
sd s2, 0(s5)
```

View File

@ -24,6 +24,7 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s
if (m_level_name != level) {
Timer tie_setup_timer;
m_wind_vectors.clear();
// We changed level!
fmt::print("TIE3 level change! {} -> {}\n", m_level_name, level);
fmt::print(" Removing old level...\n");
@ -35,16 +36,25 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s
size_t vis_temp_len = 0;
size_t max_draw = 0;
size_t max_idx_per_draw = 0;
u16 max_wind_idx = 0;
// set up each tree
for (size_t tree_idx = 0; tree_idx < lev_data->tie_trees.size(); tree_idx++) {
size_t idx_buffer_len = 0;
size_t wind_idx_buffer_len = 0;
const auto& tree = lev_data->tie_trees[tree_idx];
max_draw = std::max(tree.static_draws.size(), max_draw);
for (auto& draw : tree.static_draws) {
idx_buffer_len += draw.vertex_index_stream.size();
max_idx_per_draw = std::max(max_idx_per_draw, draw.vertex_index_stream.size());
}
for (auto& draw : tree.instanced_wind_draws) {
wind_idx_buffer_len += draw.vertex_index_stream.size();
max_idx_per_draw = std::max(max_idx_per_draw, draw.vertex_index_stream.size());
}
for (auto& inst : tree.instance_info) {
max_wind_idx = std::max(max_wind_idx, inst.wind_idx);
}
time_of_day_count = std::max(tree.colors.size(), time_of_day_count);
u32 verts = tree.vertices.size();
fmt::print(" tree {} has {} verts ({} kB) and {} draws\n", tree_idx, verts,
@ -56,6 +66,8 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s
m_trees[tree_idx].draws = &tree.static_draws; // todo - should we just copy this?
m_trees[tree_idx].colors = &tree.colors;
m_trees[tree_idx].vis = &tree.bvh;
m_trees[tree_idx].instance_info = &tree.instance_info;
m_trees[tree_idx].wind_draws = &tree.instanced_wind_draws;
vis_temp_len = std::max(vis_temp_len, tree.bvh.vis_nodes.size());
m_trees[tree_idx].tod_cache = swizzle_time_of_day(tree.colors);
glBindBuffer(GL_ARRAY_BUFFER, m_trees[tree_idx].vertex_buffer);
@ -96,6 +108,25 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s
glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_len * sizeof(u32), nullptr, GL_STREAM_DRAW);
m_trees[tree_idx].index_list.resize(idx_buffer_len);
if (wind_idx_buffer_len > 0) {
m_trees[tree_idx].wind_matrix_cache.resize(tree.instance_info.size());
m_trees[tree_idx].has_wind = true;
glGenBuffers(1, &m_trees[tree_idx].wind_vertex_index_buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_trees[tree_idx].wind_vertex_index_buffer);
std::vector<u32> temp;
temp.resize(wind_idx_buffer_len);
u32 off = 0;
for (auto& draw : tree.instanced_wind_draws) {
m_trees[tree_idx].wind_vertex_index_offsets.push_back(off);
memcpy(temp.data() + off, draw.vertex_index_stream.data(),
draw.vertex_index_stream.size() * sizeof(u32));
off += draw.vertex_index_stream.size();
}
glBufferData(GL_ELEMENT_ARRAY_BUFFER, wind_idx_buffer_len * sizeof(u32), temp.data(),
GL_STATIC_DRAW);
}
glActiveTexture(GL_TEXTURE1);
glGenTextures(1, &m_trees[tree_idx].time_of_day_texture);
glBindTexture(GL_TEXTURE_1D, m_trees[tree_idx].time_of_day_texture);
@ -136,11 +167,124 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s
fmt::print("level max time of day: {}\n", time_of_day_count);
assert(time_of_day_count <= TIME_OF_DAY_COLOR_COUNT);
fmt::print("wind: {}\n", max_wind_idx);
m_wind_vectors.resize(4 * max_wind_idx + 4); // 4x u32's per wind.
m_level_name = level;
fmt::print("TIE setup: {:.3f}\n", tie_setup_timer.getSeconds());
}
}
void vector_min_in_place(math::Vector4f& v, float val) {
for (int i = 0; i < 4; i++) {
if (v[i] > val) {
v[i] = val;
}
}
}
math::Vector4f vector_max(const math::Vector4f& v, float val) {
math::Vector4f result;
for (int i = 0; i < 4; i++) {
result[i] = std::max(val, v[i]);
}
return result;
}
void do_wind_math(u16 wind_idx,
float* wind_vector_data,
const Tie3::WindWork& wind_work,
float stiffness,
std::array<math::Vector4f, 4>& mat) {
float* my_vector = wind_vector_data + (4 * wind_idx);
const auto& work_vector = wind_work.wind_array[(wind_work.wind_time + wind_idx) & 63];
constexpr float cx = 0.5;
constexpr float cy = 100.0;
constexpr float cz = 0.0166;
constexpr float cw = -1.0;
// ld s1, 8(s5) # load wind vector 1
// pextlw s1, r0, s1 # convert to 2x 64 bits, by shifting left
// qmtc2.i vf18, s1 # put in vf
float vf18_x = my_vector[2];
float vf18_z = my_vector[3];
// ld s2, 0(s5) # load wind vector 0
// pextlw s3, r0, s2 # convert to 2x 64 bits, by shifting left
// qmtc2.i vf17, s3 # put in vf
float vf17_x = my_vector[0];
float vf17_z = my_vector[1];
// lqc2 vf16, 12(s3) # load wind vector
math::Vector4f vf16 = work_vector;
// vmula.xyzw acc, vf16, vf1 # acc = vf16
// vmsubax.xyzw acc, vf18, vf19 # acc = vf16 - vf18 * wind_const.x
// vmsuby.xyzw vf16, vf17, vf19
//# vf16 -= (vf18 * wind_const.x) + (vf17 * wind_const.y)
vf16.x() -= cx * vf18_x + cy * vf17_x;
vf16.z() -= cx * vf18_z + cy * vf17_z;
// vmulaz.xyzw acc, vf16, vf19 # acc = vf16 * wind_const.z
// vmadd.xyzw vf18, vf1, vf18
//# vf18 += vf16 * wind_const.z
math::Vector4f vf18(vf18_x, 0.f, vf18_z, 0.f);
vf18 += vf16 * cz;
// vmulaz.xyzw acc, vf18, vf19 # acc = vf18 * wind_const.z
// vmadd.xyzw vf17, vf17, vf1
//# vf17 += vf18 * wind_const.z
math::Vector4f vf17(vf17_x, 0.f, vf17_z, 0.f);
vf17 += vf18 * cz;
// vitof12.xyzw vf11, vf11 # normal convert
// vitof12.xyzw vf12, vf12 # normal convert
// vminiw.xyzw vf17, vf17, vf0
vector_min_in_place(vf17, 1.f);
// qmfc2.i s3, vf18
// ppacw s3, r0, s3
// vmaxw.xyzw vf27, vf17, vf19
auto vf27 = vector_max(vf17, cw);
// vmulw.xyzw vf27, vf27, vf15
vf27 *= stiffness;
// vmulax.yw acc, vf0, vf0
// vmulay.xz acc, vf27, vf10
// vmadd.xyzw vf10, vf1, vf10
mat[0].x() += vf27.x() * mat[0].y();
mat[0].z() += vf27.z() * mat[0].y();
// qmfc2.i s2, vf27
if (!wind_work.paused) {
my_vector[0] = vf27.x();
my_vector[1] = vf27.z();
my_vector[2] = vf18.x();
my_vector[3] = vf18.z();
}
// vmulax.yw acc, vf0, vf0
// vmulay.xz acc, vf27, vf11
// vmadd.xyzw vf11, vf1, vf11
mat[1].x() += vf27.x() * mat[1].y();
mat[1].z() += vf27.z() * mat[1].y();
// ppacw s2, r0, s2
// vmulax.yw acc, vf0, vf0
// vmulay.xz acc, vf27, vf12
// vmadd.xyzw vf12, vf1, vf12
mat[2].x() += vf27.x() * mat[2].y();
mat[2].z() += vf27.z() * mat[2].y();
//
// if not paused
// sd s3, 8(s5)
// sd s2, 0(s5)
}
void Tie3::discard_tree_cache() {
for (auto tex : m_textures) {
glBindTexture(GL_TEXTURE_2D, tex);
@ -154,6 +298,9 @@ void Tie3::discard_tree_cache() {
glDeleteBuffers(1, &tree.vertex_buffer);
glDeleteBuffers(1, &tree.index_buffer);
glDeleteVertexArrays(1, &tree.vao);
if (tree.has_wind) {
glDeleteBuffers(1, &tree.wind_vertex_index_buffer);
}
}
m_trees.clear();
@ -206,6 +353,10 @@ void Tie3::render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfi
memcpy(&m_pc_port_data, pc_port_data.data, sizeof(TfragPcPortData));
m_pc_port_data.level_name[11] = '\0';
auto wind_data = dma.read_and_advance();
assert(wind_data.size_bytes == sizeof(WindWork));
memcpy(&m_wind_data, wind_data.data, sizeof(WindWork));
while (dma.current_tag_offset() != render_state->next_bucket) {
dma.read_and_advance();
}
@ -252,6 +403,118 @@ void Tie3::render_all_trees(const TfragRenderSettings& settings,
m_all_tree_time.add(all_tree_timer.getSeconds());
}
void Tie3::render_tree_wind(int idx,
const TfragRenderSettings& settings,
SharedRenderState* render_state,
ScopedProfilerNode& prof) {
auto& tree = m_trees.at(idx);
if (tree.wind_draws->empty()) {
return;
}
// note: this isn't the most efficient because we might compute wind matrices for invisible
// instances. TODO: add vis ids to the instance info to avoid this
memset(tree.wind_matrix_cache.data(), 0, sizeof(float) * 16 * tree.wind_matrix_cache.size());
auto& cam_bad = settings.math_camera;
std::array<math::Vector4f, 4> cam;
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
cam[i][j] = cam_bad.data()[i * 4 + j];
}
}
for (size_t inst_id = 0; inst_id < tree.instance_info->size(); inst_id++) {
auto& info = tree.instance_info->operator[](inst_id);
auto& out = tree.wind_matrix_cache[inst_id];
// auto& mat = tree.instance_info->operator[](inst_id).matrix;
auto mat = info.matrix;
assert(info.wind_idx * 4 <= m_wind_vectors.size());
do_wind_math(info.wind_idx, m_wind_vectors.data(), m_wind_data,
info.stiffness * m_wind_multiplier, mat);
// vmulax.xyzw acc, vf20, vf10
// vmadday.xyzw acc, vf21, vf10
// vmaddz.xyzw vf10, vf22, vf10
out[0] = cam[0] * mat[0].x() + cam[1] * mat[0].y() + cam[2] * mat[0].z();
// vmulax.xyzw acc, vf20, vf11
// vmadday.xyzw acc, vf21, vf11
// vmaddz.xyzw vf11, vf22, vf11
out[1] = cam[0] * mat[1].x() + cam[1] * mat[1].y() + cam[2] * mat[1].z();
// vmulax.xyzw acc, vf20, vf12
// vmadday.xyzw acc, vf21, vf12
// vmaddz.xyzw vf12, vf22, vf12
out[2] = cam[0] * mat[2].x() + cam[1] * mat[2].y() + cam[2] * mat[2].z();
// vmulax.xyzw acc, vf20, vf13
// vmadday.xyzw acc, vf21, vf13
// vmaddaz.xyzw acc, vf22, vf13
// vmaddw.xyzw vf13, vf23, vf0
out[3] = cam[0] * mat[3].x() + cam[1] * mat[3].y() + cam[2] * mat[3].z() + cam[3];
}
int last_texture = -1;
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, tree.wind_vertex_index_buffer);
for (size_t draw_idx = 0; draw_idx < tree.wind_draws->size(); draw_idx++) {
const auto& draw = tree.wind_draws->operator[](draw_idx);
if ((int)draw.tree_tex_id != last_texture) {
glBindTexture(GL_TEXTURE_2D, m_textures.at(draw.tree_tex_id));
last_texture = draw.tree_tex_id;
}
auto double_draw = setup_tfrag_shader(settings, render_state, draw.mode);
int off = 0;
for (auto& grp : draw.instance_groups) {
if (!m_debug_all_visible && !m_cache.vis_temp.at(grp.vis_idx)) {
off += grp.num;
continue; // invisible, skip.
}
glUniformMatrix4fv(
glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "camera"), 1, GL_FALSE,
tree.wind_matrix_cache.at(grp.instance_idx)[0].data());
prof.add_draw_call();
prof.add_tri(grp.num);
tree.perf.draws++;
tree.perf.wind_draws++;
tree.perf.verts += grp.num;
glDrawElements(GL_TRIANGLE_STRIP, grp.num, GL_UNSIGNED_INT,
(void*)((off + tree.wind_vertex_index_offsets.at(draw_idx)) * sizeof(u32)));
off += grp.num;
switch (double_draw.kind) {
case DoubleDrawKind::NONE:
break;
case DoubleDrawKind::AFAIL_NO_DEPTH_WRITE:
tree.perf.draws++;
tree.perf.wind_draws++;
tree.perf.verts += grp.num;
prof.add_draw_call();
prof.add_tri(grp.num);
glUniform1f(
glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "alpha_min"),
-10.f);
glUniform1f(
glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "alpha_max"),
double_draw.aref);
glDepthMask(GL_FALSE);
glDrawElements(GL_TRIANGLE_STRIP, draw.vertex_index_stream.size(), GL_UNSIGNED_INT,
(void*)0);
break;
default:
assert(false);
}
}
}
}
void Tie3::render_tree(int idx,
const TfragRenderSettings& settings,
SharedRenderState* render_state,
@ -261,6 +524,7 @@ void Tie3::render_tree(int idx,
tree.perf.draws = 0;
tree.perf.verts = 0;
tree.perf.full_draws = 0;
tree.perf.wind_draws = 0;
if (m_color_result.size() < tree.colors->size()) {
m_color_result.resize(tree.colors->size());
@ -388,6 +652,12 @@ void Tie3::render_tree(int idx,
render_state->shaders[ShaderId::TFRAG3].activate();
}
}
if (!m_hide_wind) {
auto wind_prof = prof.make_scoped_child("wind");
render_tree_wind(idx, settings, render_state, wind_prof);
}
glBindVertexArray(0);
tree.perf.draw_time.add(draw_timer.getSeconds());
tree.perf.tree_time.add(tree_timer.getSeconds());
@ -403,6 +673,8 @@ void Tie3::draw_debug_window() {
ImGui::Checkbox("Wireframe", &m_debug_wireframe);
ImGui::SameLine();
ImGui::Checkbox("All Visible", &m_debug_all_visible);
ImGui::Checkbox("Hide Wind", &m_hide_wind);
ImGui::SliderFloat("Wind Multiplier", &m_wind_multiplier, 0., 40.f);
ImGui::Separator();
for (u32 i = 0; i < m_trees.size(); i++) {
auto& perf = m_trees[i].perf;
@ -410,6 +682,7 @@ void Tie3::draw_debug_window() {
ImGui::Text("index data bytes: %d", perf.index_upload);
ImGui::Text("time of days: %d", (int)m_trees[i].colors->size());
ImGui::Text("draw: %d, full: %d, verts: %d", perf.draws, perf.full_draws, perf.verts);
ImGui::Text("wind draw: %d", perf.wind_draws);
ImGui::Text("total: %.2f", perf.tree_time.get());
ImGui::Text("cull: %.2f index: %.2f tod: %.2f setup: %.2f draw: %.2f",
perf.cull_time.get() * 1000.f, perf.index_time.get() * 1000.f,

View File

@ -23,8 +23,23 @@ class Tie3 : public BucketRenderer {
ScopedProfilerNode& prof);
void setup_for_level(const std::string& str, SharedRenderState* render_state);
struct WindWork {
u32 paused;
u32 pad[3];
math::Vector4f wind_array[64];
math::Vector4f wind_normal;
math::Vector4f wind_temp;
float wind_force[64];
u32 wind_time;
u32 pad2[3];
} m_wind_data;
private:
void discard_tree_cache();
void render_tree_wind(int idx,
const TfragRenderSettings& settings,
SharedRenderState* render_state,
ScopedProfilerNode& prof);
struct Tree {
GLuint vertex_buffer;
GLuint index_buffer;
@ -33,15 +48,24 @@ class Tie3 : public BucketRenderer {
GLuint vao;
u32 vert_count;
const std::vector<tfrag3::StripDraw>* draws = nullptr;
const std::vector<tfrag3::InstancedStripDraw>* wind_draws = nullptr;
const std::vector<tfrag3::TieWindInstance>* instance_info = nullptr;
const std::vector<tfrag3::TimeOfDayColor>* colors = nullptr;
const tfrag3::BVH* vis = nullptr;
SwizzledTimeOfDay tod_cache;
std::vector<std::array<math::Vector4f, 4>> wind_matrix_cache;
bool has_wind = false;
GLuint wind_vertex_index_buffer;
std::vector<u32> wind_vertex_index_offsets;
struct {
u32 index_upload = 0;
u32 verts = 0;
u32 draws = 0;
u32 full_draws = 0; // ones that have all visible
u32 wind_draws = 0;
Filtered<float> cull_time;
Filtered<float> index_time;
Filtered<float> tod_time;
@ -70,7 +94,14 @@ class Tie3 : public BucketRenderer {
bool m_use_fast_time_of_day = true;
bool m_debug_wireframe = false;
bool m_debug_all_visible = false;
bool m_hide_wind = false;
Filtered<float> m_all_tree_time;
TfragPcPortData m_pc_port_data;
std::vector<float> m_wind_vectors; // note: I suspect these are shared with shrub.
float m_wind_multiplier = 1.f;
static_assert(sizeof(WindWork) == 84 * 16);
};

View File

@ -254,6 +254,21 @@
obj
)
(defun add-pc-wind-data ((dma-buf dma-buffer))
;; packet to send 84 qw's
(let ((packet (the-as dma-packet (-> dma-buf base))))
(set! (-> packet dma) (new 'static 'dma-tag :id (dma-tag-id cnt) :qwc 84))
(set! (-> packet vif0) (new 'static 'vif-tag))
(set! (-> packet vif1) (new 'static 'vif-tag :cmd (vif-cmd pc-port)))
(set! (-> dma-buf base) (the pointer (&+ packet 16)))
)
(quad-copy! (-> dma-buf base) (the pointer (&- (the pointer *wind-work*) 4)) 84)
(set! (-> (the (pointer uint32) (-> dma-buf base)) 0)
(if (paused?) 1 0)
)
(&+! (-> dma-buf base) (* 16 84))
)
(defun draw-drawable-tree-instance-tie ((arg0 drawable-tree-instance-tie) (arg1 level))
"Actually draw TIE instances.
Will draw TIE, TIE-NEAR, and GENERIC"
@ -418,6 +433,7 @@
(reset! (-> *perf-stats* data 11))
;;(draw-inline-array-prototype-tie-asm s1-1 s5-1 s4-1)
(add-pc-tfrag3-data s1-1 (-> *level* data (-> (scratchpad-object terrain-context) bsp lev-index)))
(add-pc-wind-data s1-1)
(read! (-> *perf-stats* data 11))
(update-wait-stats (-> *perf-stats* data 11) (the-as uint 0)
(-> *prototype-tie-work* wait-to-spr)