Comment fixes, reindentation.

This commit is contained in:
Henrik Rydgård 2018-03-02 14:57:35 +01:00
parent fcdb816235
commit 6fa9fcefb2
10 changed files with 319 additions and 384 deletions

View File

@ -126,7 +126,7 @@ protected:
TransformedVertex *transformed = nullptr;
TransformedVertex *transformedExpanded = nullptr;
// Defer all vertex decoding to a "Flush" (except when software skinning)
// Defer all vertex decoding to a "Flush" (except when skinning)
struct DeferredDrawCall {
void *verts;
void *inds;

View File

@ -41,7 +41,7 @@ static const u8 nrmsize[4] = { 0, 3, 6, 12 }, nrmalign[4] = { 0, 1, 2, 4 };
static const u8 possize[4] = { 3, 3, 6, 12 }, posalign[4] = { 1, 1, 2, 4 };
static const u8 wtsize[4] = { 0, 1, 2, 4 }, wtalign[4] = { 0, 1, 2, 4 };
// When software skinning. This array is only used when non-jitted - when jitted, the matrix
// This array is only used when non-jitted - when jitted, the matrix
// is kept in registers.
alignas(16) static float skinMatrix[12];
@ -49,13 +49,6 @@ inline int align(int n, int align) {
return (n + (align - 1)) & ~(align - 1);
}
int TranslateNumBones(int bones) {
if (!bones) return 0;
if (bones < 4) return 4;
// if (bones < 8) return 8; I get drawing problems in FF:CC with this!
return bones;
}
int DecFmtSize(u8 fmt) {
switch (fmt) {
case DEC_NONE: return 0;

View File

@ -431,9 +431,6 @@ struct JitLookup {
JitStepFunction jitFunc;
};
// Collapse to less skinning shaders to reduce shader switching, which is expensive.
int TranslateNumBones(int bones);
typedef void(*JittedVertexDecoder)(const u8 *src, u8 *dst, int count);
struct VertexDecoderOptions {

View File

@ -343,7 +343,7 @@ void DrawEngineD3D11::DoFlush() {
// Cannot cache vertex data with morph enabled.
bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK);
// Also avoid caching when software skinning.
// Also avoid caching when skinning.
if (lastVType_ & GE_VTYPE_WEIGHT_MASK)
useCache = false;

View File

@ -324,7 +324,7 @@ void DrawEngineDX9::DoFlush() {
// Cannot cache vertex data with morph enabled.
bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK);
// Also avoid caching when software skinning.
// Also avoid caching when skinning.
if (lastVType_ & GE_VTYPE_WEIGHT_MASK)
useCache = false;

View File

@ -358,116 +358,113 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
}
}
} else {
// Step 1: World Transform / Skinning
if (true) {
// Hardware tessellation
if (doSpline || doBezier) {
WRITE(p, " uint num_patches_u = %s;\n", doBezier ? "(u_spline_count_u - 1) / 3u" : "u_spline_count_u - 3");
WRITE(p, " float2 tess_pos = In.position.xy;\n");
WRITE(p, " int u = In.instanceId %% num_patches_u;\n");
WRITE(p, " int v = In.instanceId / num_patches_u;\n");
WRITE(p, " int2 patch_pos = int2(u, v);\n");
WRITE(p, " float3 _pos[16];\n");
WRITE(p, " float2 _tex[16];\n");
WRITE(p, " float4 _col[16];\n");
WRITE(p, " int idx;\n");
WRITE(p, " int2 index;\n");
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
WRITE(p, " idx = (%i + v%s) * u_spline_count_u + (%i + u%s);\n", i, doBezier ? " * 3" : "", j, doBezier ? " * 3" : "");
WRITE(p, " index = int2(idx, 0);\n");
WRITE(p, " _pos[%i] = u_tess_pos_tex.Load(index).xyz;\n", i * 4 + j);
if (doTexture && hasTexcoord && hasTexcoordTess)
WRITE(p, " _tex[%i] = u_tess_tex_tex.Load(index).xy;\n", i * 4 + j);
if (hasColor && hasColorTess)
WRITE(p, " _col[%i] = u_tess_col_tex.Load(index).rgba;\n", i * 4 + j);
}
// Step 1: World Transform
// Hardware tessellation
if (doSpline || doBezier) {
WRITE(p, " uint num_patches_u = %s;\n", doBezier ? "(u_spline_count_u - 1) / 3u" : "u_spline_count_u - 3");
WRITE(p, " float2 tess_pos = In.position.xy;\n");
WRITE(p, " int u = In.instanceId %% num_patches_u;\n");
WRITE(p, " int v = In.instanceId / num_patches_u;\n");
WRITE(p, " int2 patch_pos = int2(u, v);\n");
WRITE(p, " float3 _pos[16];\n");
WRITE(p, " float2 _tex[16];\n");
WRITE(p, " float4 _col[16];\n");
WRITE(p, " int idx;\n");
WRITE(p, " int2 index;\n");
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
WRITE(p, " idx = (%i + v%s) * u_spline_count_u + (%i + u%s);\n", i, doBezier ? " * 3" : "", j, doBezier ? " * 3" : "");
WRITE(p, " index = int2(idx, 0);\n");
WRITE(p, " _pos[%i] = u_tess_pos_tex.Load(index).xyz;\n", i * 4 + j);
if (doTexture && hasTexcoord && hasTexcoordTess)
WRITE(p, " _tex[%i] = u_tess_tex_tex.Load(index).xy;\n", i * 4 + j);
if (hasColor && hasColorTess)
WRITE(p, " _col[%i] = u_tess_col_tex.Load(index).rgba;\n", i * 4 + j);
}
WRITE(p, " float2 weights[4];\n");
if (doBezier) {
// Bernstein 3D
WRITE(p, " weights[0] = (1.0 - tess_pos) * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
WRITE(p, " weights[1] = 3.0 * tess_pos * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
WRITE(p, " weights[2] = 3.0 * tess_pos * tess_pos * (1.0 - tess_pos);\n");
WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n");
} else if (doSpline) {
WRITE(p, " int2 spline_num_patches = int2(u_spline_count_u - 3, u_spline_count_v - 3);\n");
WRITE(p, " int2 spline_type = int2(u_spline_type_u, u_spline_type_v);\n");
WRITE(p, " float2 knots[6];\n");
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
WRITE(p, " spline_weight(tess_pos + patch_pos, knots, weights);\n");
}
WRITE(p, " float3 pos = tess_sample(_pos, weights);\n");
if (doTexture && hasTexcoord) {
if (hasTexcoordTess)
WRITE(p, " float2 tex = tess_sample(_tex, weights);\n");
else
WRITE(p, " float2 tex = tess_pos + patch_pos;\n");
}
if (hasColor) {
if (hasColorTess)
WRITE(p, " float4 col = tess_sample(_col, weights);\n");
else
WRITE(p, " float4 col = u_tess_col_tex.Load(int2(0, 0)).rgba;\n");
}
if (hasNormal) {
// Curved surface is probably always need to compute normal(not sampling from control points)
if (doBezier) {
// Bernstein derivative
WRITE(p, " float2 bernderiv[4];\n");
WRITE(p, " bernderiv[0] = -3.0 * (tess_pos - 1.0) * (tess_pos - 1.0); \n");
WRITE(p, " bernderiv[1] = 9.0 * tess_pos * tess_pos - 12.0 * tess_pos + 3.0; \n");
WRITE(p, " bernderiv[2] = 3.0 * (2.0 - 3.0 * tess_pos) * tess_pos; \n");
WRITE(p, " bernderiv[3] = 3.0 * tess_pos * tess_pos; \n");
WRITE(p, " float2 bernderiv_u[4];\n");
WRITE(p, " float2 bernderiv_v[4];\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " bernderiv_u[i] = float2(bernderiv[i].x, weights[i].y);\n");
WRITE(p, " bernderiv_v[i] = float2(weights[i].x, bernderiv[i].y);\n");
WRITE(p, " }\n");
WRITE(p, " float3 du = tess_sample(_pos, bernderiv_u);\n");
WRITE(p, " float3 dv = tess_sample(_pos, bernderiv_v);\n");
} else if (doSpline) {
WRITE(p, " float2 tess_next_u = float2(In.normal.x, 0.0);\n");
WRITE(p, " float2 tess_next_v = float2(0.0, In.normal.y);\n");
// Right
WRITE(p, " float2 tess_pos_r = tess_pos + tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_r + patch_pos, knots, weights);\n");
WRITE(p, " float3 pos_r = tess_sample(_pos, weights);\n");
// Left
WRITE(p, " float2 tess_pos_l = tess_pos - tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_l + patch_pos, knots, weights);\n");
WRITE(p, " float3 pos_l = tess_sample(_pos, weights);\n");
// Down
WRITE(p, " float2 tess_pos_d = tess_pos + tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_d + patch_pos, knots, weights);\n");
WRITE(p, " float3 pos_d = tess_sample(_pos, weights);\n");
// Up
WRITE(p, " float2 tess_pos_u = tess_pos - tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_u + patch_pos, knots, weights);\n");
WRITE(p, " float3 pos_u = tess_sample(_pos, weights);\n");
WRITE(p, " float3 du = pos_r - pos_l;\n");
WRITE(p, " float3 dv = pos_d - pos_u;\n");
}
WRITE(p, " float3 nrm = cross(du, dv);\n");
WRITE(p, " nrm = normalize(nrm);\n");
}
WRITE(p, " float3 worldpos = mul(float4(pos.xyz, 1.0), u_world);\n");
if (hasNormal)
WRITE(p, " float3 worldnormal = normalize(mul(float4(%snrm, 0.0), u_world));\n", flipNormalTess ? "-" : "");
else
WRITE(p, " float3 worldnormal = float3(0.0, 0.0, 1.0);\n");
} else {
// No skinning, just standard T&L.
WRITE(p, " float3 worldpos = mul(float4(In.position.xyz, 1.0), u_world);\n");
if (hasNormal)
WRITE(p, " float3 worldnormal = normalize(mul(float4(%sIn.normal, 0.0), u_world));\n", flipNormal ? "-" : "");
else
WRITE(p, " float3 worldnormal = float3(0.0, 0.0, 1.0);\n");
}
WRITE(p, " float2 weights[4];\n");
if (doBezier) {
// Bernstein 3D
WRITE(p, " weights[0] = (1.0 - tess_pos) * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
WRITE(p, " weights[1] = 3.0 * tess_pos * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
WRITE(p, " weights[2] = 3.0 * tess_pos * tess_pos * (1.0 - tess_pos);\n");
WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n");
} else if (doSpline) {
WRITE(p, " int2 spline_num_patches = int2(u_spline_count_u - 3, u_spline_count_v - 3);\n");
WRITE(p, " int2 spline_type = int2(u_spline_type_u, u_spline_type_v);\n");
WRITE(p, " float2 knots[6];\n");
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
WRITE(p, " spline_weight(tess_pos + patch_pos, knots, weights);\n");
}
WRITE(p, " float3 pos = tess_sample(_pos, weights);\n");
if (doTexture && hasTexcoord) {
if (hasTexcoordTess)
WRITE(p, " float2 tex = tess_sample(_tex, weights);\n");
else
WRITE(p, " float2 tex = tess_pos + patch_pos;\n");
}
if (hasColor) {
if (hasColorTess)
WRITE(p, " float4 col = tess_sample(_col, weights);\n");
else
WRITE(p, " float4 col = u_tess_col_tex.Load(int2(0, 0)).rgba;\n");
}
if (hasNormal) {
// Curved surface is probably always need to compute normal(not sampling from control points)
if (doBezier) {
// Bernstein derivative
WRITE(p, " float2 bernderiv[4];\n");
WRITE(p, " bernderiv[0] = -3.0 * (tess_pos - 1.0) * (tess_pos - 1.0); \n");
WRITE(p, " bernderiv[1] = 9.0 * tess_pos * tess_pos - 12.0 * tess_pos + 3.0; \n");
WRITE(p, " bernderiv[2] = 3.0 * (2.0 - 3.0 * tess_pos) * tess_pos; \n");
WRITE(p, " bernderiv[3] = 3.0 * tess_pos * tess_pos; \n");
WRITE(p, " float2 bernderiv_u[4];\n");
WRITE(p, " float2 bernderiv_v[4];\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " bernderiv_u[i] = float2(bernderiv[i].x, weights[i].y);\n");
WRITE(p, " bernderiv_v[i] = float2(weights[i].x, bernderiv[i].y);\n");
WRITE(p, " }\n");
WRITE(p, " float3 du = tess_sample(_pos, bernderiv_u);\n");
WRITE(p, " float3 dv = tess_sample(_pos, bernderiv_v);\n");
} else if (doSpline) {
WRITE(p, " float2 tess_next_u = float2(In.normal.x, 0.0);\n");
WRITE(p, " float2 tess_next_v = float2(0.0, In.normal.y);\n");
// Right
WRITE(p, " float2 tess_pos_r = tess_pos + tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_r + patch_pos, knots, weights);\n");
WRITE(p, " float3 pos_r = tess_sample(_pos, weights);\n");
// Left
WRITE(p, " float2 tess_pos_l = tess_pos - tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_l + patch_pos, knots, weights);\n");
WRITE(p, " float3 pos_l = tess_sample(_pos, weights);\n");
// Down
WRITE(p, " float2 tess_pos_d = tess_pos + tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_d + patch_pos, knots, weights);\n");
WRITE(p, " float3 pos_d = tess_sample(_pos, weights);\n");
// Up
WRITE(p, " float2 tess_pos_u = tess_pos - tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_u + patch_pos, knots, weights);\n");
WRITE(p, " float3 pos_u = tess_sample(_pos, weights);\n");
WRITE(p, " float3 du = pos_r - pos_l;\n");
WRITE(p, " float3 dv = pos_d - pos_u;\n");
}
WRITE(p, " float3 nrm = cross(du, dv);\n");
WRITE(p, " nrm = normalize(nrm);\n");
}
WRITE(p, " float3 worldpos = mul(float4(pos.xyz, 1.0), u_world);\n");
if (hasNormal)
WRITE(p, " float3 worldnormal = normalize(mul(float4(%snrm, 0.0), u_world));\n", flipNormalTess ? "-" : "");
else
WRITE(p, " float3 worldnormal = float3(0.0, 0.0, 1.0);\n");
} else {
WRITE(p, " float3 worldpos = mul(float4(In.position.xyz, 1.0), u_world);\n");
if (hasNormal)
WRITE(p, " float3 worldnormal = normalize(mul(float4(%sIn.normal, 0.0), u_world));\n", flipNormal ? "-" : "");
else
WRITE(p, " float3 worldnormal = float3(0.0, 0.0, 1.0);\n");
}
WRITE(p, " float4 viewPos = float4(mul(float4(worldpos, 1.0), u_view), 1.0);\n");

View File

@ -15,52 +15,6 @@
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
// Ideas for speeding things up on mobile OpenGL ES implementations
//
// Use superbuffers! Yes I just invented that name.
//
// The idea is to avoid respecifying the vertex format between every draw call (multiple glVertexAttribPointer ...)
// by combining the contents of multiple draw calls into one buffer, as long as
// they have exactly the same output vertex format. (different input formats is fine! This way
// we can combine the data for multiple draws with different numbers of bones, as we consider numbones < 4 to be = 4)
// into one VBO.
//
// This will likely be a win because I believe that between every change of VBO + glVertexAttribPointer*N, the driver will
// perform a lot of validation, probably at draw call time, while all the validation can be skipped if the only thing
// that changes between two draw calls is simple state or texture or a matrix etc, not anything vertex related.
// Also the driver will have to manage hundreds instead of thousands of VBOs in games like GTA.
//
// * Every 10 frames or something, do the following:
// - Frame 1:
// + Mark all drawn buffers with in-frame sequence numbers (alternatively,
// just log them in an array)
// - Frame 2 (beginning?):
// + Take adjacent buffers that have the same output vertex format, and add them
// to a list of buffers to combine. Create said buffers with appropriate sizes
// and precompute the offsets that the draws should be written into.
// - Frame 2 (end):
// + Actually do the work of combining the buffers. This probably means re-decoding
// the vertices into a new one. Will also have to apply index offsets.
//
// Also need to change the drawing code so that we don't glBindBuffer and respecify glVAP if
// two subsequent drawcalls come from the same superbuffer.
//
// Or we ignore all of this including vertex caching and simply find a way to do highly optimized vertex streaming,
// like Dolphin is trying to. That will likely never be able to reach the same speed as perfectly optimized
// superbuffers though. For this we will have to JIT the vertex decoder but that's not too hard.
//
// Now, when do we delete superbuffers? Maybe when half the buffers within have been killed?
//
// Another idea for GTA which switches textures a lot while not changing much other state is to use ES 3 Array
// textures, if they are the same size (even if they aren't, might be okay to simply resize the textures to match
// if they're just a multiple of 2 away) or something. Then we'd have to add a W texture coordinate to choose the
// texture within the bound texture array to the vertex data when merging into superbuffers.
//
// There are even more things to try. For games that do matrix palette skinning by quickly switching bones and
// just drawing a few triangles per call (NBA, FF:CC, Tekken 6 etc) we could even collect matrices, upload them
// all at once, writing matrix indices into the vertices in addition to the weights, and then doing a single
// draw call with specially generated shader to draw the whole mesh. This code will be seriously complex though.
#include "base/logging.h"
#include "base/timeutil.h"
@ -376,7 +330,7 @@ void DrawEngineGLES::DoFlush() {
// Cannot cache vertex data with morph enabled.
bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK);
// Also avoid caching when software skinning.
// Also avoid caching when skinning.
if (lastVType_ & GE_VTYPE_WEIGHT_MASK)
useCache = false;
@ -522,7 +476,7 @@ void DrawEngineGLES::DoFlush() {
vai->lastFrame = gpuStats.numFlips;
} else {
if (lastVType_ & GE_VTYPE_WEIGHT_MASK) {
// If software skinning, we've already predecoded into "decoded". So push that content.
// If skinning, we've already predecoded into "decoded". So push that content.
size_t size = decodedVerts_ * dec_->GetDecVtxFmt().stride;
u8 *dest = (u8 *)frameData.pushVertex->Push(size, &vertexBufferOffset, &vertexBuffer);
memcpy(dest, decoded, size);

View File

@ -436,114 +436,111 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
}
}
} else {
// Step 1: World Transform / Skinning
if (true) {
// Hardware tessellation
if (doBezier || doSpline) {
WRITE(p, " vec3 _pos[16];\n");
WRITE(p, " vec2 _tex[16];\n");
WRITE(p, " vec4 _col[16];\n");
WRITE(p, " int num_patches_u = %s;\n", doBezier ? "(u_spline_count_u - 1) / 3" : "u_spline_count_u - 3");
WRITE(p, " int u = int(mod(float(gl_InstanceID), float(num_patches_u)));\n");
WRITE(p, " int v = gl_InstanceID / num_patches_u;\n");
WRITE(p, " ivec2 patch_pos = ivec2(u, v);\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " for (int j = 0; j < 4; j++) {\n");
WRITE(p, " int index = (i + v%s) * u_spline_count_u + (j + u%s);\n", doBezier ? " * 3" : "", doBezier ? " * 3" : "");
WRITE(p, " _pos[i * 4 + j] = %s(u_tess_pos_tex, ivec2(index, 0), 0).xyz;\n", texelFetch);
if (doTexture && hasTexcoord && hasTexcoordTess)
WRITE(p, " _tex[i * 4 + j] = %s(u_tess_tex_tex, ivec2(index, 0), 0).xy;\n", texelFetch);
if (hasColor && hasColorTess)
WRITE(p, " _col[i * 4 + j] = %s(u_tess_col_tex, ivec2(index, 0), 0).rgba;\n", texelFetch);
WRITE(p, " }\n");
WRITE(p, " }\n");
WRITE(p, " vec2 tess_pos = position.xy;\n");
WRITE(p, " vec2 weights[4];\n");
if (doBezier) {
// Bernstein 3D
WRITE(p, " weights[0] = (1.0 - tess_pos) * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
WRITE(p, " weights[1] = 3.0 * tess_pos * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
WRITE(p, " weights[2] = 3.0 * tess_pos * tess_pos * (1.0 - tess_pos);\n");
WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n");
} else { // Spline
WRITE(p, " ivec2 spline_num_patches = ivec2(u_spline_count_u - 3, u_spline_count_v - 3);\n");
WRITE(p, " ivec2 spline_type = ivec2(u_spline_type_u, u_spline_type_v);\n");
WRITE(p, " vec2 knots[6];\n");
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
WRITE(p, " spline_weight(tess_pos + vec2(patch_pos), knots, weights);\n");
}
WRITE(p, " vec3 pos = tess_sample(_pos, weights);\n");
if (doTexture && hasTexcoord) {
if (hasTexcoordTess)
WRITE(p, " vec2 tex = tess_sample(_tex, weights);\n");
else
WRITE(p, " vec2 tex = tess_pos + vec2(patch_pos);\n");
}
if (hasColor) {
if (hasColorTess)
WRITE(p, " vec4 col = tess_sample(_col, weights);\n");
else
WRITE(p, " vec4 col = %s(u_tess_col_tex, ivec2(0, 0), 0).rgba;\n", texelFetch);
}
if (hasNormal) {
// Curved surface is probably always need to compute normal(not sampling from control points)
if (doBezier) {
// Bernstein derivative
WRITE(p, " vec2 bernderiv[4];\n");
WRITE(p, " bernderiv[0] = -3.0 * (tess_pos - 1.0) * (tess_pos - 1.0); \n");
WRITE(p, " bernderiv[1] = 9.0 * tess_pos * tess_pos - 12.0 * tess_pos + 3.0; \n");
WRITE(p, " bernderiv[2] = 3.0 * (2.0 - 3.0 * tess_pos) * tess_pos; \n");
WRITE(p, " bernderiv[3] = 3.0 * tess_pos * tess_pos; \n");
WRITE(p, " vec2 bernderiv_u[4];\n");
WRITE(p, " vec2 bernderiv_v[4];\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " bernderiv_u[i] = vec2(bernderiv[i].x, weights[i].y);\n");
WRITE(p, " bernderiv_v[i] = vec2(weights[i].x, bernderiv[i].y);\n");
WRITE(p, " }\n");
WRITE(p, " vec3 du = tess_sample(_pos, bernderiv_u);\n");
WRITE(p, " vec3 dv = tess_sample(_pos, bernderiv_v);\n");
} else { // Spline
WRITE(p, " vec2 tess_next_u = vec2(normal.x, 0.0);\n");
WRITE(p, " vec2 tess_next_v = vec2(0.0, normal.y);\n");
// Right
WRITE(p, " vec2 tess_pos_r = tess_pos + tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_r + vec2(patch_pos), knots, weights);\n");
WRITE(p, " vec3 pos_r = tess_sample(_pos, weights);\n");
// Left
WRITE(p, " vec2 tess_pos_l = tess_pos - tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_l + vec2(patch_pos), knots, weights);\n");
WRITE(p, " vec3 pos_l = tess_sample(_pos, weights);\n");
// Down
WRITE(p, " vec2 tess_pos_d = tess_pos + tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_d + vec2(patch_pos), knots, weights);\n");
WRITE(p, " vec3 pos_d = tess_sample(_pos, weights);\n");
// Up
WRITE(p, " vec2 tess_pos_u = tess_pos - tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_u + vec2(patch_pos), knots, weights);\n");
WRITE(p, " vec3 pos_u = tess_sample(_pos, weights);\n");
WRITE(p, " vec3 du = pos_r - pos_l;\n");
WRITE(p, " vec3 dv = pos_d - pos_u;\n");
}
WRITE(p, " vec3 nrm = cross(du, dv);\n");
WRITE(p, " nrm = normalize(nrm);\n");
}
WRITE(p, " vec3 worldpos = (u_world * vec4(pos.xyz, 1.0)).xyz;\n");
if (hasNormal) {
WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(%snrm, 0.0)).xyz);\n", flipNormalTess ? "-" : "");
} else {
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
} else {
// No skinning, just standard T&L.
WRITE(p, " vec3 worldpos = (u_world * vec4(position.xyz, 1.0)).xyz;\n");
if (hasNormal)
WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(%snormal, 0.0)).xyz);\n", flipNormal ? "-" : "");
else
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
// Step 1: World Transform
// Hardware tessellation
if (doBezier || doSpline) {
WRITE(p, " vec3 _pos[16];\n");
WRITE(p, " vec2 _tex[16];\n");
WRITE(p, " vec4 _col[16];\n");
WRITE(p, " int num_patches_u = %s;\n", doBezier ? "(u_spline_count_u - 1) / 3" : "u_spline_count_u - 3");
WRITE(p, " int u = int(mod(float(gl_InstanceID), float(num_patches_u)));\n");
WRITE(p, " int v = gl_InstanceID / num_patches_u;\n");
WRITE(p, " ivec2 patch_pos = ivec2(u, v);\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " for (int j = 0; j < 4; j++) {\n");
WRITE(p, " int index = (i + v%s) * u_spline_count_u + (j + u%s);\n", doBezier ? " * 3" : "", doBezier ? " * 3" : "");
WRITE(p, " _pos[i * 4 + j] = %s(u_tess_pos_tex, ivec2(index, 0), 0).xyz;\n", texelFetch);
if (doTexture && hasTexcoord && hasTexcoordTess)
WRITE(p, " _tex[i * 4 + j] = %s(u_tess_tex_tex, ivec2(index, 0), 0).xy;\n", texelFetch);
if (hasColor && hasColorTess)
WRITE(p, " _col[i * 4 + j] = %s(u_tess_col_tex, ivec2(index, 0), 0).rgba;\n", texelFetch);
WRITE(p, " }\n");
WRITE(p, " }\n");
WRITE(p, " vec2 tess_pos = position.xy;\n");
WRITE(p, " vec2 weights[4];\n");
if (doBezier) {
// Bernstein 3D
WRITE(p, " weights[0] = (1.0 - tess_pos) * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
WRITE(p, " weights[1] = 3.0 * tess_pos * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
WRITE(p, " weights[2] = 3.0 * tess_pos * tess_pos * (1.0 - tess_pos);\n");
WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n");
} else { // Spline
WRITE(p, " ivec2 spline_num_patches = ivec2(u_spline_count_u - 3, u_spline_count_v - 3);\n");
WRITE(p, " ivec2 spline_type = ivec2(u_spline_type_u, u_spline_type_v);\n");
WRITE(p, " vec2 knots[6];\n");
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
WRITE(p, " spline_weight(tess_pos + vec2(patch_pos), knots, weights);\n");
}
WRITE(p, " vec3 pos = tess_sample(_pos, weights);\n");
if (doTexture && hasTexcoord) {
if (hasTexcoordTess)
WRITE(p, " vec2 tex = tess_sample(_tex, weights);\n");
else
WRITE(p, " vec2 tex = tess_pos + vec2(patch_pos);\n");
}
if (hasColor) {
if (hasColorTess)
WRITE(p, " vec4 col = tess_sample(_col, weights);\n");
else
WRITE(p, " vec4 col = %s(u_tess_col_tex, ivec2(0, 0), 0).rgba;\n", texelFetch);
}
if (hasNormal) {
// Curved surface is probably always need to compute normal(not sampling from control points)
if (doBezier) {
// Bernstein derivative
WRITE(p, " vec2 bernderiv[4];\n");
WRITE(p, " bernderiv[0] = -3.0 * (tess_pos - 1.0) * (tess_pos - 1.0); \n");
WRITE(p, " bernderiv[1] = 9.0 * tess_pos * tess_pos - 12.0 * tess_pos + 3.0; \n");
WRITE(p, " bernderiv[2] = 3.0 * (2.0 - 3.0 * tess_pos) * tess_pos; \n");
WRITE(p, " bernderiv[3] = 3.0 * tess_pos * tess_pos; \n");
WRITE(p, " vec2 bernderiv_u[4];\n");
WRITE(p, " vec2 bernderiv_v[4];\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " bernderiv_u[i] = vec2(bernderiv[i].x, weights[i].y);\n");
WRITE(p, " bernderiv_v[i] = vec2(weights[i].x, bernderiv[i].y);\n");
WRITE(p, " }\n");
WRITE(p, " vec3 du = tess_sample(_pos, bernderiv_u);\n");
WRITE(p, " vec3 dv = tess_sample(_pos, bernderiv_v);\n");
} else { // Spline
WRITE(p, " vec2 tess_next_u = vec2(normal.x, 0.0);\n");
WRITE(p, " vec2 tess_next_v = vec2(0.0, normal.y);\n");
// Right
WRITE(p, " vec2 tess_pos_r = tess_pos + tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_r + vec2(patch_pos), knots, weights);\n");
WRITE(p, " vec3 pos_r = tess_sample(_pos, weights);\n");
// Left
WRITE(p, " vec2 tess_pos_l = tess_pos - tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_l + vec2(patch_pos), knots, weights);\n");
WRITE(p, " vec3 pos_l = tess_sample(_pos, weights);\n");
// Down
WRITE(p, " vec2 tess_pos_d = tess_pos + tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_d + vec2(patch_pos), knots, weights);\n");
WRITE(p, " vec3 pos_d = tess_sample(_pos, weights);\n");
// Up
WRITE(p, " vec2 tess_pos_u = tess_pos - tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_u + vec2(patch_pos), knots, weights);\n");
WRITE(p, " vec3 pos_u = tess_sample(_pos, weights);\n");
WRITE(p, " vec3 du = pos_r - pos_l;\n");
WRITE(p, " vec3 dv = pos_d - pos_u;\n");
}
WRITE(p, " vec3 nrm = cross(du, dv);\n");
WRITE(p, " nrm = normalize(nrm);\n");
}
WRITE(p, " vec3 worldpos = (u_world * vec4(pos.xyz, 1.0)).xyz;\n");
if (hasNormal) {
WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(%snrm, 0.0)).xyz);\n", flipNormalTess ? "-" : "");
} else {
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
} else {
WRITE(p, " vec3 worldpos = (u_world * vec4(position.xyz, 1.0)).xyz;\n");
if (hasNormal)
WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(%snormal, 0.0)).xyz);\n", flipNormal ? "-" : "");
else
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
WRITE(p, " vec4 viewPos = u_view * vec4(worldpos, 1.0);\n");

View File

@ -578,7 +578,7 @@ void DrawEngineVulkan::DoFlush() {
// Cannot cache vertex data with morph enabled.
bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK);
// Also avoid caching when software skinning.
// Also avoid caching when skinning.
VkBuffer vbuf = VK_NULL_HANDLE;
VkBuffer ibuf = VK_NULL_HANDLE;
if (lastVType_ & GE_VTYPE_WEIGHT_MASK) {
@ -724,7 +724,7 @@ void DrawEngineVulkan::DoFlush() {
}
} else {
if (lastVType_ & GE_VTYPE_WEIGHT_MASK) {
// If software skinning, we've already predecoded into "decoded". So push that content.
// If skinning, we've already predecoded into "decoded". So push that content.
VkDeviceSize size = decodedVerts_ * dec_->GetDecVtxFmt().stride;
u8 *dest = (u8 *)frame->pushVertex->Push(size, &vbOffset, &vbuf);
memcpy(dest, decoded, size);

View File

@ -311,112 +311,109 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer, bool *use
}
} else {
// Step 1: World Transform / Skinning
if (true) {
if (doBezier || doSpline) {
WRITE(p, " vec3 _pos[16];\n");
WRITE(p, " vec2 _tex[16];\n");
WRITE(p, " vec4 _col[16];\n");
WRITE(p, " int num_patches_u = %s;\n", doBezier ? "(base.spline_count_u - 1) / 3" : "base.spline_count_u - 3");
WRITE(p, " int u = int(mod(gl_InstanceIndex, num_patches_u));\n");
WRITE(p, " int v = gl_InstanceIndex / num_patches_u;\n");
WRITE(p, " ivec2 patch_pos = ivec2(u, v);\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " for (int j = 0; j < 4; j++) {\n");
WRITE(p, " int idx = (i + v%s) * base.spline_count_u + (j + u%s);\n", doBezier ? " * 3" : "", doBezier ? " * 3" : "");
WRITE(p, " _pos[i * 4 + j] = tess_data.data[idx].pos.xyz;\n");
if (doTexture && hasTexcoord && hasTexcoordTess)
WRITE(p, " _tex[i * 4 + j] = tess_data.data[idx].uv.xy;\n");
if (hasColor && hasColorTess)
WRITE(p, " _col[i * 4 + j] = tess_data.data[idx].color;\n");
WRITE(p, " }\n");
WRITE(p, " }\n");
WRITE(p, " vec2 tess_pos = position.xy;\n");
WRITE(p, " vec2 weights[4];\n");
if (doBezier) {
// Bernstein 3D
WRITE(p, " weights[0] = (1 - tess_pos) * (1 - tess_pos) * (1 - tess_pos);\n");
WRITE(p, " weights[1] = 3 * tess_pos * (1 - tess_pos) * (1 - tess_pos);\n");
WRITE(p, " weights[2] = 3 * tess_pos * tess_pos * (1 - tess_pos);\n");
WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n");
} else { // Spline
WRITE(p, " ivec2 spline_num_patches = ivec2(base.spline_count_u - 3, base.spline_count_v - 3);\n");
WRITE(p, " ivec2 spline_type = ivec2(base.spline_type_u, base.spline_type_v);\n");
WRITE(p, " vec2 knots[6];\n");
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
WRITE(p, " spline_weight(tess_pos + patch_pos, knots, weights);\n");
}
WRITE(p, " vec3 pos = tess_sample(_pos, weights);\n");
if (doTexture && hasTexcoord) {
if (hasTexcoordTess)
WRITE(p, " vec2 tex = tess_sample(_tex, weights);\n");
else
WRITE(p, " vec2 tex = tess_pos + patch_pos;\n");
}
if (hasColor) {
if (hasColorTess)
WRITE(p, " vec4 col = tess_sample(_col, weights);\n");
else
WRITE(p, " vec4 col = tess_data.data[0].color;\n");
}
if (hasNormal) {
// Curved surface is probably always need to compute normal(not sampling from control points)
if (doBezier) {
// Bernstein derivative
WRITE(p, " vec2 bernderiv[4];\n");
WRITE(p, " bernderiv[0] = -3 * (tess_pos - 1) * (tess_pos - 1); \n");
WRITE(p, " bernderiv[1] = 9 * tess_pos * tess_pos - 12 * tess_pos + 3; \n");
WRITE(p, " bernderiv[2] = 3 * (2 - 3 * tess_pos) * tess_pos; \n");
WRITE(p, " bernderiv[3] = 3 * tess_pos * tess_pos; \n");
WRITE(p, " vec2 bernderiv_u[4];\n");
WRITE(p, " vec2 bernderiv_v[4];\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " bernderiv_u[i] = vec2(bernderiv[i].x, weights[i].y);\n");
WRITE(p, " bernderiv_v[i] = vec2(weights[i].x, bernderiv[i].y);\n");
WRITE(p, " }\n");
WRITE(p, " vec3 du = tess_sample(_pos, bernderiv_u);\n");
WRITE(p, " vec3 dv = tess_sample(_pos, bernderiv_v);\n");
} else { // Spline
WRITE(p, " vec2 tess_next_u = vec2(normal.x, 0);\n");
WRITE(p, " vec2 tess_next_v = vec2(0, normal.y);\n");
// Right
WRITE(p, " vec2 tess_pos_r = tess_pos + tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_r + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_r = tess_sample(_pos, weights);\n");
// Left
WRITE(p, " vec2 tess_pos_l = tess_pos - tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_l + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_l = tess_sample(_pos, weights);\n");
// Down
WRITE(p, " vec2 tess_pos_d = tess_pos + tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_d + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_d = tess_sample(_pos, weights);\n");
// Up
WRITE(p, " vec2 tess_pos_u = tess_pos - tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_u + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_u = tess_sample(_pos, weights);\n");
WRITE(p, " vec3 du = pos_r - pos_l;\n");
WRITE(p, " vec3 dv = pos_d - pos_u;\n");
}
WRITE(p, " vec3 nrm = cross(du, dv);\n");
WRITE(p, " nrm = normalize(nrm);\n");
}
WRITE(p, " vec3 worldpos = vec4(pos.xyz, 1.0) * base.world_mtx;\n");
if (hasNormal) {
WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%snrm, 0.0) * base.world_mtx);\n", flipNormalTess ? "-" : "");
} else {
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
} else {
// No skinning, just standard T&L.
WRITE(p, " vec3 worldpos = vec4(position.xyz, 1.0) * base.world_mtx;\n");
if (hasNormal)
WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%snormal, 0.0) * base.world_mtx);\n", flipNormal ? "-" : "");
else
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
if (doBezier || doSpline) {
WRITE(p, " vec3 _pos[16];\n");
WRITE(p, " vec2 _tex[16];\n");
WRITE(p, " vec4 _col[16];\n");
WRITE(p, " int num_patches_u = %s;\n", doBezier ? "(base.spline_count_u - 1) / 3" : "base.spline_count_u - 3");
WRITE(p, " int u = int(mod(gl_InstanceIndex, num_patches_u));\n");
WRITE(p, " int v = gl_InstanceIndex / num_patches_u;\n");
WRITE(p, " ivec2 patch_pos = ivec2(u, v);\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " for (int j = 0; j < 4; j++) {\n");
WRITE(p, " int idx = (i + v%s) * base.spline_count_u + (j + u%s);\n", doBezier ? " * 3" : "", doBezier ? " * 3" : "");
WRITE(p, " _pos[i * 4 + j] = tess_data.data[idx].pos.xyz;\n");
if (doTexture && hasTexcoord && hasTexcoordTess)
WRITE(p, " _tex[i * 4 + j] = tess_data.data[idx].uv.xy;\n");
if (hasColor && hasColorTess)
WRITE(p, " _col[i * 4 + j] = tess_data.data[idx].color;\n");
WRITE(p, " }\n");
WRITE(p, " }\n");
WRITE(p, " vec2 tess_pos = position.xy;\n");
WRITE(p, " vec2 weights[4];\n");
if (doBezier) {
// Bernstein 3D
WRITE(p, " weights[0] = (1 - tess_pos) * (1 - tess_pos) * (1 - tess_pos);\n");
WRITE(p, " weights[1] = 3 * tess_pos * (1 - tess_pos) * (1 - tess_pos);\n");
WRITE(p, " weights[2] = 3 * tess_pos * tess_pos * (1 - tess_pos);\n");
WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n");
} else { // Spline
WRITE(p, " ivec2 spline_num_patches = ivec2(base.spline_count_u - 3, base.spline_count_v - 3);\n");
WRITE(p, " ivec2 spline_type = ivec2(base.spline_type_u, base.spline_type_v);\n");
WRITE(p, " vec2 knots[6];\n");
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
WRITE(p, " spline_weight(tess_pos + patch_pos, knots, weights);\n");
}
WRITE(p, " vec3 pos = tess_sample(_pos, weights);\n");
if (doTexture && hasTexcoord) {
if (hasTexcoordTess)
WRITE(p, " vec2 tex = tess_sample(_tex, weights);\n");
else
WRITE(p, " vec2 tex = tess_pos + patch_pos;\n");
}
if (hasColor) {
if (hasColorTess)
WRITE(p, " vec4 col = tess_sample(_col, weights);\n");
else
WRITE(p, " vec4 col = tess_data.data[0].color;\n");
}
if (hasNormal) {
// Curved surface is probably always need to compute normal(not sampling from control points)
if (doBezier) {
// Bernstein derivative
WRITE(p, " vec2 bernderiv[4];\n");
WRITE(p, " bernderiv[0] = -3 * (tess_pos - 1) * (tess_pos - 1); \n");
WRITE(p, " bernderiv[1] = 9 * tess_pos * tess_pos - 12 * tess_pos + 3; \n");
WRITE(p, " bernderiv[2] = 3 * (2 - 3 * tess_pos) * tess_pos; \n");
WRITE(p, " bernderiv[3] = 3 * tess_pos * tess_pos; \n");
WRITE(p, " vec2 bernderiv_u[4];\n");
WRITE(p, " vec2 bernderiv_v[4];\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " bernderiv_u[i] = vec2(bernderiv[i].x, weights[i].y);\n");
WRITE(p, " bernderiv_v[i] = vec2(weights[i].x, bernderiv[i].y);\n");
WRITE(p, " }\n");
WRITE(p, " vec3 du = tess_sample(_pos, bernderiv_u);\n");
WRITE(p, " vec3 dv = tess_sample(_pos, bernderiv_v);\n");
} else { // Spline
WRITE(p, " vec2 tess_next_u = vec2(normal.x, 0);\n");
WRITE(p, " vec2 tess_next_v = vec2(0, normal.y);\n");
// Right
WRITE(p, " vec2 tess_pos_r = tess_pos + tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_r + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_r = tess_sample(_pos, weights);\n");
// Left
WRITE(p, " vec2 tess_pos_l = tess_pos - tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_l + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_l = tess_sample(_pos, weights);\n");
// Down
WRITE(p, " vec2 tess_pos_d = tess_pos + tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_d + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_d = tess_sample(_pos, weights);\n");
// Up
WRITE(p, " vec2 tess_pos_u = tess_pos - tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_u + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_u = tess_sample(_pos, weights);\n");
WRITE(p, " vec3 du = pos_r - pos_l;\n");
WRITE(p, " vec3 dv = pos_d - pos_u;\n");
}
WRITE(p, " vec3 nrm = cross(du, dv);\n");
WRITE(p, " nrm = normalize(nrm);\n");
}
WRITE(p, " vec3 worldpos = vec4(pos.xyz, 1.0) * base.world_mtx;\n");
if (hasNormal) {
WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%snrm, 0.0) * base.world_mtx);\n", flipNormalTess ? "-" : "");
} else {
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
} else {
WRITE(p, " vec3 worldpos = vec4(position.xyz, 1.0) * base.world_mtx;\n");
if (hasNormal)
WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%snormal, 0.0) * base.world_mtx);\n", flipNormal ? "-" : "");
else
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
WRITE(p, " vec4 viewPos = vec4(vec4(worldpos, 1.0) * base.view_mtx, 1.0);\n");