[spline/bezier]Implement hardware tessellation on OpenGL.

This commit is contained in:
xebra 2017-01-09 03:37:21 +09:00 committed by Henrik Rydgård
parent 38293adc78
commit 5aeaa95f2c
7 changed files with 498 additions and 15 deletions

View File

@ -96,6 +96,7 @@ protected:
int prevSizeTex;
int prevSizeCol;
public:
// Send spline/bezier's control points to vertex shader through floating point texture.
virtual void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) = 0;
};
TessellationDataTransfer *tessDataTransfer;

View File

@ -143,6 +143,8 @@ DrawEngineGLES::DrawEngineGLES()
InitDeviceObjects();
register_gl_resource_holder(this);
tessDataTransfer = new TessellationDataTransferGLES();
}
DrawEngineGLES::~DrawEngineGLES() {
@ -154,6 +156,8 @@ DrawEngineGLES::~DrawEngineGLES() {
FreeMemoryPages(transformedExpanded, 3 * TRANSFORMED_VERTEX_BUFFER_SIZE);
unregister_gl_resource_holder(this);
delete tessDataTransfer;
}
void DrawEngineGLES::RestoreVAO() {
@ -844,7 +848,11 @@ rotateVBO:
SetupDecFmtForDraw(program, dec_->GetDecVtxFmt(), vbo ? 0 : decoded);
if (useElements) {
glDrawElements(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, ebo ? 0 : (GLvoid*)decIndex);
if (gstate_c.bezier || gstate_c.spline)
// Instanced rendering for instanced tessellation
glDrawElementsInstanced(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, ebo ? 0 : (GLvoid*)decIndex, numPatches);
else
glDrawElements(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, ebo ? 0 : (GLvoid*)decIndex);
} else {
glDrawArrays(glprim[prim], 0, vertexCount);
}
@ -1105,3 +1113,52 @@ void DrawEngineGLES::DecimateBuffers() {
bool DrawEngineGLES::IsCodePtrVertexDecoder(const u8 *ptr) const {
return decJitCache_->IsInSpace(ptr);
}
void DrawEngineGLES::TessellationDataTransferGLES::SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) {
// Position
glActiveTexture(GL_TEXTURE3);
glBindTexture(GL_TEXTURE_1D, data_tex[0]);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_T, GL_CLAMP);
if (prevSize < size) {
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB32F, size, 0, GL_RGB, GL_FLOAT, (GLfloat*)pos);
prevSize = size;
} else {
glTexSubImage1D(GL_TEXTURE_1D, 0, 0, size, GL_RGB, GL_FLOAT, (GLfloat*)pos);
}
// Texcoords
if (hasTexCoords) {
glActiveTexture(GL_TEXTURE4);
glBindTexture(GL_TEXTURE_1D, data_tex[1]);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_T, GL_CLAMP);
if (prevSizeTex < size) {
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB32F, size, 0, GL_RGB, GL_FLOAT, (GLfloat*)tex);
prevSizeTex = size;
} else {
glTexSubImage1D(GL_TEXTURE_1D, 0, 0, size, GL_RGB, GL_FLOAT, (GLfloat*)tex);
}
}
// Color
glActiveTexture(GL_TEXTURE5);
glBindTexture(GL_TEXTURE_1D, data_tex[2]);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_T, GL_CLAMP);
int sizeColor = hasColor ? size : 1;
if (prevSizeCol < sizeColor) {
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, sizeColor, 0, GL_RGBA, GL_FLOAT, (GLfloat*)col);
prevSizeCol = sizeColor;
} else {
glTexSubImage1D(GL_TEXTURE_1D, 0, 0, sizeColor, GL_RGBA, GL_FLOAT, (GLfloat*)col);
}
glActiveTexture(GL_TEXTURE0);
}

View File

@ -272,4 +272,18 @@ private:
bool fboTexNeedBind_;
bool fboTexBound_;
// Hardware tessellation
class TessellationDataTransferGLES : public TessellationDataTransfer {
private:
int data_tex[3];
public:
TessellationDataTransferGLES() : TessellationDataTransfer(), data_tex() {
glGenTextures(3, (GLuint*)data_tex);
}
~TessellationDataTransferGLES() {
glDeleteTextures(3, (GLuint*)data_tex);
}
void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) override;
};
};

View File

@ -406,7 +406,6 @@ GPU_GLES::GPU_GLES(GraphicsContext *ctx)
framebufferManager_ = framebufferManagerGL_;
textureCacheGL_ = new TextureCache();
textureCache_ = textureCacheGL_;
drawEngineCommon_ = &drawEngine_;
drawEngine_.SetShaderManager(shaderManager_);
drawEngine_.SetTextureCache(textureCacheGL_);
@ -986,6 +985,155 @@ void GPU_GLES::Execute_VertexTypeSkinning(u32 op, u32 diff) {
}
}
void GPU_GLES::Execute_Bezier(u32 op, u32 diff) {
// This also make skipping drawing very effective.
framebufferManagerGL_->SetRenderFrameBuffer(gstate_c.framebufChanged, gstate_c.skipDrawReason);
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
return;
}
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
return;
}
void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
void *indices = NULL;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
return;
}
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
if (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) {
DEBUG_LOG_REPORT(G3D, "Bezier + morph: %i", (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT);
}
if (vertTypeIsSkinningEnabled(gstate.vertType)) {
DEBUG_LOG_REPORT(G3D, "Bezier + skinning: %i", vertTypeGetNumBoneWeights(gstate.vertType));
}
GEPatchPrimType patchPrim = gstate.getPatchPrimitiveType();
int bz_ucount = op & 0xFF;
int bz_vcount = (op >> 8) & 0xFF;
bool computeNormals = gstate.isLightingEnabled();
bool patchFacing = gstate.patchfacing & 1;
if (g_Config.bHardwareTessellation && g_Config.bHardwareTransform && !g_Config.bSoftwareRendering) {
gstate_c.bezier = true;
if (gstate_c.bezier_count_u != bz_ucount) {
shaderManager_->DirtyUniform(DIRTY_BEZIERCOUNTU);
gstate_c.bezier_count_u = bz_ucount;
}
}
int bytesRead = 0;
drawEngine_.SubmitBezier(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), bz_ucount, bz_vcount, patchPrim, computeNormals, patchFacing, gstate.vertType, &bytesRead);
gstate_c.bezier = false;
// After drawing, we advance pointers - see SubmitPrim which does the same.
int count = bz_ucount * bz_vcount;
AdvanceVerts(gstate.vertType, count, bytesRead);
}
void GPU_GLES::Execute_Spline(u32 op, u32 diff) {
// This also make skipping drawing very effective.
framebufferManagerGL_->SetRenderFrameBuffer(gstate_c.framebufChanged, gstate_c.skipDrawReason);
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
return;
}
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
return;
}
void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
void *indices = NULL;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
return;
}
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
if (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) {
DEBUG_LOG_REPORT(G3D, "Spline + morph: %i", (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT);
}
if (vertTypeIsSkinningEnabled(gstate.vertType)) {
DEBUG_LOG_REPORT(G3D, "Spline + skinning: %i", vertTypeGetNumBoneWeights(gstate.vertType));
}
int sp_ucount = op & 0xFF;
int sp_vcount = (op >> 8) & 0xFF;
int sp_utype = (op >> 16) & 0x3;
int sp_vtype = (op >> 18) & 0x3;
GEPatchPrimType patchPrim = gstate.getPatchPrimitiveType();
bool computeNormals = gstate.isLightingEnabled();
bool patchFacing = gstate.patchfacing & 1;
u32 vertType = gstate.vertType;
if (g_Config.bHardwareTessellation && g_Config.bHardwareTransform && !g_Config.bSoftwareRendering) {
gstate_c.spline = true;
if (gstate_c.spline_count_u != sp_ucount) {
shaderManager_->DirtyUniform(DIRTY_SPLINECOUNTU);
gstate_c.spline_count_u = sp_ucount;
}
if (gstate_c.spline_count_v != sp_vcount) {
shaderManager_->DirtyUniform(DIRTY_SPLINECOUNTV);
gstate_c.spline_count_v = sp_vcount;
}
if (gstate_c.spline_type_u != sp_utype) {
shaderManager_->DirtyUniform(DIRTY_SPLINETYPEU);
gstate_c.spline_type_u = sp_utype;
}
if (gstate_c.spline_type_v != sp_vtype) {
shaderManager_->DirtyUniform(DIRTY_SPLINETYPEV);
gstate_c.spline_type_v = sp_vtype;
}
}
int bytesRead = 0;
drawEngine_.SubmitSpline(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), sp_ucount, sp_vcount, sp_utype, sp_vtype, patchPrim, computeNormals, patchFacing, vertType, &bytesRead);
gstate_c.spline = false;
// After drawing, we advance pointers - see SubmitPrim which does the same.
int count = sp_ucount * sp_vcount;
AdvanceVerts(gstate.vertType, count, bytesRead);
}
void GPU_GLES::Execute_BoundingBox(u32 op, u32 diff) {
// Just resetting, nothing to bound.
const u32 data = op & 0x00FFFFFF;
if (data == 0) {
// TODO: Should this set the bboxResult? Let's set it true for now.
currentList->bboxResult = true;
return;
}
if (((data & 7) == 0) && data <= 64) { // Sanity check
void *control_points = Memory::GetPointer(gstate_c.vertexAddr);
if (gstate.vertType & GE_VTYPE_IDX_MASK) {
ERROR_LOG_REPORT_ONCE(boundingbox, G3D, "Indexed bounding box data not supported.");
// Data seems invalid. Let's assume the box test passed.
currentList->bboxResult = true;
return;
}
// Test if the bounding box is within the drawing region.
currentList->bboxResult = drawEngine_.TestBoundingBox(control_points, data, gstate.vertType);
} else {
ERROR_LOG_REPORT_ONCE(boundingbox, G3D, "Bad bounding box data: %06x", data);
// Data seems invalid. Let's assume the box test passed.
currentList->bboxResult = true;
}
}
void GPU_GLES::Execute_Region(u32 op, u32 diff) {
gstate_c.framebufChanged = true;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;

View File

@ -229,6 +229,17 @@ LinkedShader::LinkedShader(ShaderID VSID, Shader *vs, ShaderID FSID, Shader *fs,
sprintf(temp, "u_lightspecular%i", i);
u_lightspecular[i] = glGetUniformLocation(program, temp);
}
if (gstate_c.bezier || gstate_c.spline) {
u_tess_pos_tex = glGetUniformLocation(program, "u_tess_pos_tex");
u_tess_tex_tex = glGetUniformLocation(program, "u_tess_tex_tex");
u_tess_col_tex = glGetUniformLocation(program, "u_tess_col_tex");
u_spline_count_u = glGetUniformLocation(program, "u_spline_count_u");
if (gstate_c.spline) {
u_spline_count_v = glGetUniformLocation(program, "u_spline_count_v");
u_spline_type_u = glGetUniformLocation(program, "u_spline_type_u");
u_spline_type_v = glGetUniformLocation(program, "u_spline_type_v");
}
}
attrMask = 0;
if (-1 != glGetAttribLocation(program, "position")) attrMask |= 1 << ATTR_POSITION;
@ -282,6 +293,14 @@ LinkedShader::LinkedShader(ShaderID VSID, Shader *vs, ShaderID FSID, Shader *fs,
u_lightpos[i] != -1)
availableUniforms |= DIRTY_LIGHT0 << i;
}
if (gstate_c.bezier) {
if (u_spline_count_u != -1) availableUniforms |= DIRTY_BEZIERCOUNTU;
} else if (gstate_c.spline) {
if (u_spline_count_u != -1) availableUniforms |= DIRTY_SPLINECOUNTU;
if (u_spline_count_v != -1) availableUniforms |= DIRTY_SPLINECOUNTV;
if (u_spline_type_u != -1) availableUniforms |= DIRTY_SPLINETYPEU;
if (u_spline_type_v != -1) availableUniforms |= DIRTY_SPLINETYPEV;
}
glUseProgram(program);
@ -289,6 +308,11 @@ LinkedShader::LinkedShader(ShaderID VSID, Shader *vs, ShaderID FSID, Shader *fs,
glUniform1i(u_tex, 0);
glUniform1i(u_fbotex, 1);
glUniform1i(u_testtex, 2);
if (gstate_c.bezier || gstate_c.spline) {
glUniform1i(u_tess_pos_tex, 3); // Texture unit 3
glUniform1i(u_tess_tex_tex, 4); // Texture unit 4
glUniform1i(u_tess_col_tex, 5); // Texture unit 5
}
// The rest, use the "dirty" mechanism.
dirtyUniforms = DIRTY_ALL;
}
@ -537,10 +561,22 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
const float widthFactor = (float)w * invW;
const float heightFactor = (float)h * invH;
float uvscaleoff[4];
uvscaleoff[0] = widthFactor;
uvscaleoff[1] = heightFactor;
uvscaleoff[2] = 0.0f;
uvscaleoff[3] = 0.0f;
if (gstate_c.bezier || gstate_c.spline) {
// TODO: Move somewhere or fix properly
// Fix temporarily avoid texture scaling bug with hardware tessellation.
// This issue occurs probably since rev 9d7983e.
static const float rescale[4] = {1.0f, 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
const float factor = rescale[(vertType & GE_VTYPE_TC_MASK) >> GE_VTYPE_TC_SHIFT];
uvscaleoff[0] = gstate_c.uv.uScale * factor * widthFactor;
uvscaleoff[1] = gstate_c.uv.vScale * factor * heightFactor;
uvscaleoff[2] = gstate_c.uv.uOff * widthFactor;
uvscaleoff[3] = gstate_c.uv.vOff * heightFactor;
} else {
uvscaleoff[0] = widthFactor;
uvscaleoff[1] = heightFactor;
uvscaleoff[2] = 0.0f;
uvscaleoff[3] = 0.0f;
}
glUniform4fv(u_uvscaleoffset, 1, uvscaleoff);
}
@ -704,6 +740,20 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
if (u_lightspecular[i] != -1) SetColorUniform3(u_lightspecular[i], gstate.lcolor[i * 3 + 2]);
}
}
if (gstate_c.bezier) {
if (dirty & DIRTY_BEZIERCOUNTU)
glUniform1i(u_spline_count_u, gstate_c.bezier_count_u);
} else if (gstate_c.spline) {
if (dirty & DIRTY_SPLINECOUNTU)
glUniform1i(u_spline_count_u, gstate_c.spline_count_u);
if (dirty & DIRTY_SPLINECOUNTV)
glUniform1i(u_spline_count_v, gstate_c.spline_count_v);
if (dirty & DIRTY_SPLINETYPEU)
glUniform1i(u_spline_type_u, gstate_c.spline_type_u);
if (dirty & DIRTY_SPLINETYPEV)
glUniform1i(u_spline_type_v, gstate_c.spline_type_v);
}
}
ShaderManager::ShaderManager()

View File

@ -110,6 +110,14 @@ public:
int u_lightdiffuse[4]; // each light consist of vec4[3]
int u_lightspecular[4]; // attenuation
int u_lightambient[4]; // attenuation
int u_tess_pos_tex;
int u_tess_tex_tex;
int u_tess_col_tex;
int u_spline_count_u;
int u_spline_count_v;
int u_spline_type_u;
int u_spline_type_v;
};
enum {

View File

@ -170,6 +170,12 @@ void GenerateVertexShader(const ShaderID &id, char *buffer) {
bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE);
int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3);
bool doBezier = id.Bit(VS_BIT_BEZIER);
bool doSpline = id.Bit(VS_BIT_SPLINE);
bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS);
bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS);
bool flipNormalTess = id.Bit(VS_BIT_NORM_REVERSE_TESS);
const char *shading = "";
if (glslES30)
shading = doFlatShading ? "flat " : "";
@ -319,6 +325,87 @@ void GenerateVertexShader(const ShaderID &id, char *buffer) {
WRITE(p, "}\n\n");
}
// Hardware tessellation
if (doBezier || doSpline) {
WRITE(p, "uniform sampler1D u_tess_pos_tex;\n");
WRITE(p, "uniform sampler1D u_tess_tex_tex;\n");
WRITE(p, "uniform sampler1D u_tess_col_tex;\n");
WRITE(p, "uniform int u_spline_count_u;\n");
for (int i = 2; i <= 4; i++) {
// Define 3 types vec2, vec3, vec4
WRITE(p, "vec%d tess_sample(in vec%d points[16], in vec2 weights[4]) {\n", i, i);
WRITE(p, " vec%d pos = vec%d(0);\n", i, i);
WRITE(p, " for (int i = 0; i < 4; ++i) {\n");
WRITE(p, " for (int j = 0; j < 4; ++j) {\n");
WRITE(p, " float f = weights[j].x * weights[i].y;\n");
WRITE(p, " if (f != 0)\n");
WRITE(p, " pos = pos + f * points[i * 4 + j];\n");
WRITE(p, " }\n");
WRITE(p, " }\n");
WRITE(p, " return pos;\n");
WRITE(p, "}\n");
}
if (doSpline) {
WRITE(p, "uniform int u_spline_count_v;\n");
WRITE(p, "uniform int u_spline_type_u;\n");
WRITE(p, "uniform int u_spline_type_v;\n");
WRITE(p, "void spline_knot(ivec2 num_patches, ivec2 type, out vec2 knot[6], ivec2 patch_pos) {\n");
WRITE(p, " for (int i = 0; i < 6; ++i) {\n");
WRITE(p, " knot[i] = vec2(i + patch_pos.x - 2, i + patch_pos.y - 2);\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.x & 1) != 0) {\n");
WRITE(p, " if (patch_pos.x <= 2)\n");
WRITE(p, " knot[0].x = 0;\n");
WRITE(p, " if (patch_pos.x <= 1)\n");
WRITE(p, " knot[1].x = 0;\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.x & 2) != 0) {\n");
WRITE(p, " if (patch_pos.x >= (num_patches.x - 2))\n");
WRITE(p, " knot[5].x = num_patches.x;\n");
WRITE(p, " if (patch_pos.x == (num_patches.x - 1))\n");
WRITE(p, " knot[4].x = num_patches.x;\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.y & 1) != 0) {\n");
WRITE(p, " if (patch_pos.y <= 2)\n");
WRITE(p, " knot[0].y = 0;\n");
WRITE(p, " if (patch_pos.y <= 1)\n");
WRITE(p, " knot[1].y = 0;\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.y & 2) != 0) {\n");
WRITE(p, " if (patch_pos.y >= (num_patches.y - 2))\n");
WRITE(p, " knot[5].y = num_patches.y;\n");
WRITE(p, " if (patch_pos.y == (num_patches.y - 1))\n");
WRITE(p, " knot[4].y = num_patches.y;\n");
WRITE(p, " }\n");
WRITE(p, "}\n");
WRITE(p, "void spline_weight(vec2 t, in vec2 knot[6], out vec2 weights[4]) {\n");
// TODO: Maybe compilers could be coaxed into vectorizing this code without the above explicitly...
WRITE(p, " vec2 t0 = (t - knot[0]);\n");
WRITE(p, " vec2 t1 = (t - knot[1]);\n");
WRITE(p, " vec2 t2 = (t - knot[2]);\n");
// TODO: All our knots are integers so we should be able to get rid of these divisions (How?)
WRITE(p, " vec2 f30 = t0 / (knot[3] - knot[0]);\n");
WRITE(p, " vec2 f41 = t1 / (knot[4] - knot[1]);\n");
WRITE(p, " vec2 f52 = t2 / (knot[5] - knot[2]);\n");
WRITE(p, " vec2 f31 = t1 / (knot[3] - knot[1]);\n");
WRITE(p, " vec2 f42 = t2 / (knot[4] - knot[2]);\n");
WRITE(p, " vec2 f32 = t2 / (knot[3] - knot[2]);\n");
WRITE(p, " vec2 a = (1 - f30)*(1 - f31);\n");
WRITE(p, " vec2 b = (f31*f41);\n");
WRITE(p, " vec2 c = (1 - f41)*(1 - f42);\n");
WRITE(p, " vec2 d = (f42*f52);\n");
WRITE(p, " weights[0] = a - (a*f32);\n");
WRITE(p, " weights[1] = 1 - a - b + ((a + b + c - 1)*f32);\n");
WRITE(p, " weights[2] = b + ((1 - b - c - d)*f32);\n");
WRITE(p, " weights[3] = d*f32;\n");
WRITE(p, "}\n");
}
}
WRITE(p, "void main() {\n");
if (!useHWTransform) {
@ -355,12 +442,116 @@ void GenerateVertexShader(const ShaderID &id, char *buffer) {
} else {
// Step 1: World Transform / Skinning
if (!enableBones) {
// No skinning, just standard T&L.
WRITE(p, " vec3 worldpos = (u_world * vec4(position.xyz, 1.0)).xyz;\n");
if (hasNormal)
WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(%snormal, 0.0)).xyz);\n", flipNormal ? "-" : "");
else
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
// Hardware tessellation
if (doBezier || doSpline) {
WRITE(p, " vec3 _pos[16];\n");
WRITE(p, " vec2 _tex[16];\n");
WRITE(p, " vec4 _col[16];\n");
WRITE(p, " int num_patches_u = %s;\n", doBezier ? "(u_spline_count_u - 1) / 3" : "u_spline_count_u - 3");
WRITE(p, " int u = int(mod(gl_InstanceID, num_patches_u));\n");
WRITE(p, " int v = (gl_InstanceID / num_patches_u);\n");
WRITE(p, " ivec2 patch_pos = ivec2(u, v);\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " for (int j = 0; j < 4; j++) {\n");
WRITE(p, " int index = (i + v%s) * u_spline_count_u + (j + u%s);\n", doBezier ? " * 3" : "", doBezier ? " * 3" : "");
WRITE(p, " _pos[i * 4 + j] = texelFetch(u_tess_pos_tex, index, 0).xyz;\n");
if (doTexture && hasTexcoord && hasTexcoordTess)
WRITE(p, " _tex[i * 4 + j] = texelFetch(u_tess_tex_tex, index, 0).xy;\n");
if (hasColor && hasColorTess)
WRITE(p, " _col[i * 4 + j] = texelFetch(u_tess_col_tex, index, 0).rgba;\n");
WRITE(p, " }\n");
WRITE(p, " }\n");
WRITE(p, " vec2 tess_pos = position.xy;\n");
WRITE(p, " vec2 weights[4];\n");
if (doBezier) {
// Bernstein 3D
WRITE(p, " weights[0] = (1 - tess_pos) * (1 - tess_pos) * (1 - tess_pos);\n");
WRITE(p, " weights[1] = 3 * tess_pos * (1 - tess_pos) * (1 - tess_pos);\n");
WRITE(p, " weights[2] = 3 * tess_pos * tess_pos * (1 - tess_pos);\n");
WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n");
} else if (doSpline) {
WRITE(p, " ivec2 spline_num_patches = ivec2(u_spline_count_u - 3, u_spline_count_v - 3);\n");
WRITE(p, " ivec2 spline_type = ivec2(u_spline_type_u, u_spline_type_v);\n");
WRITE(p, " vec2 knots[6];\n");
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
WRITE(p, " spline_weight(tess_pos + patch_pos, knots, weights);\n");
}
WRITE(p, " vec3 pos = tess_sample(_pos, weights);\n");
if (doTexture && hasTexcoord) {
if (hasTexcoordTess)
WRITE(p, " vec2 tex = tess_sample(_tex, weights);\n");
else
WRITE(p, " vec2 tex = tess_pos + patch_pos;\n");
}
if (hasColor) {
if (hasColorTess)
WRITE(p, " vec4 col = tess_sample(_col, weights);\n");
else
WRITE(p, " vec4 col = texelFetch(u_tess_col_tex, 0, 0).rgba;\n");
}
if (hasNormal) {
// Curved surface is probably always need to compute normal(not sampling from control points)
if (doBezier) {
// Bernstein derivative
WRITE(p, " vec2 bernderiv[4];\n");
WRITE(p, " bernderiv[0] = -3 * (tess_pos - 1) * (tess_pos - 1); \n");
WRITE(p, " bernderiv[1] = 9 * tess_pos * tess_pos - 12 * tess_pos + 3; \n");
WRITE(p, " bernderiv[2] = 3 * (2 - 3 * tess_pos) * tess_pos; \n");
WRITE(p, " bernderiv[3] = 3 * tess_pos * tess_pos; \n");
WRITE(p, " vec2 bernderiv_u[4];\n");
WRITE(p, " vec2 bernderiv_v[4];\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " bernderiv_u[i] = vec2(bernderiv[i].x, weights[i].y);\n");
WRITE(p, " bernderiv_v[i] = vec2(weights[i].x, bernderiv[i].y);\n");
WRITE(p, " }\n");
WRITE(p, " vec3 du = tess_sample(_pos, bernderiv_u);\n");
WRITE(p, " vec3 dv = tess_sample(_pos, bernderiv_v);\n");
} else if (doSpline) {
WRITE(p, " vec2 tess_next_u = vec2(normal.x, 0);\n");
WRITE(p, " vec2 tess_next_v = vec2(0, normal.y);\n");
// Right
WRITE(p, " vec2 tess_pos_r = tess_pos + tess_next_u;\n");
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
WRITE(p, " spline_weight(tess_pos_r + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_r = tess_sample(_pos, weights);\n");
// Left
WRITE(p, " vec2 tess_pos_l = tess_pos - tess_next_u;\n");
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
WRITE(p, " spline_weight(tess_pos_l + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_l = tess_sample(_pos, weights);\n");
// Down
WRITE(p, " vec2 tess_pos_d = tess_pos + tess_next_v;\n");
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
WRITE(p, " spline_weight(tess_pos_d + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_d = tess_sample(_pos, weights);\n");
// Up
WRITE(p, " vec2 tess_pos_u = tess_pos - tess_next_v;\n");
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
WRITE(p, " spline_weight(tess_pos_u + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_u = tess_sample(_pos, weights);\n");
WRITE(p, " vec3 du = pos_r - pos_l;\n");
WRITE(p, " vec3 dv = pos_d - pos_u;\n");
}
WRITE(p, " vec3 nrm = cross(du, dv);\n");
WRITE(p, " nrm = normalize(nrm);\n");
}
WRITE(p, " vec3 worldpos = (u_world * vec4(pos.xyz, 1.0)).xyz;\n");
if (hasNormal) {
WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(%snrm, 0.0)).xyz);\n", flipNormalTess ? "-" : "");
} else {
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
} else {
// No skinning, just standard T&L.
WRITE(p, " vec3 worldpos = (u_world * vec4(position.xyz, 1.0)).xyz;\n");
if (hasNormal)
WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(%snormal, 0.0)).xyz);\n", flipNormal ? "-" : "");
else
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
} else {
static const char *rescale[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
const char *factor = rescale[boneWeightScale];
@ -452,6 +643,11 @@ void GenerateVertexShader(const ShaderID &id, char *buffer) {
const char *ambientStr = (matUpdate & 1) && hasColor ? "color0" : "u_matambientalpha";
const char *diffuseStr = (matUpdate & 2) && hasColor ? "color0.rgb" : "u_matdiffuse";
const char *specularStr = (matUpdate & 4) && hasColor ? "color0.rgb" : "u_matspecular.rgb";
if (doBezier || doSpline) {
ambientStr = (matUpdate & 1) && hasColor ? "col" : "u_matambientalpha";
diffuseStr = (matUpdate & 2) && hasColor ? "col.rgb" : "u_matdiffuse";
specularStr = (matUpdate & 4) && hasColor ? "col.rgb" : "u_matspecular.rgb";
}
bool diffuseIsZero = true;
bool specularIsZero = true;
@ -570,7 +766,10 @@ void GenerateVertexShader(const ShaderID &id, char *buffer) {
} else {
// Lighting doesn't affect color.
if (hasColor) {
WRITE(p, " v_color0 = color0;\n");
if (doBezier || doSpline)
WRITE(p, " v_color0 = col;\n");
else
WRITE(p, " v_color0 = color0;\n");
} else {
WRITE(p, " v_color0 = u_matambientalpha;\n");
}
@ -585,13 +784,19 @@ void GenerateVertexShader(const ShaderID &id, char *buffer) {
case GE_TEXMAP_UNKNOWN: // Not sure what this is, but Riviera uses it. Treating as coords works.
if (scaleUV) {
if (hasTexcoord) {
WRITE(p, " v_texcoord = texcoord * u_uvscaleoffset.xy;\n");
if (doBezier || doSpline)
WRITE(p, " v_texcoord = tex * u_uvscaleoffset.xy;\n");
else
WRITE(p, " v_texcoord = texcoord * u_uvscaleoffset.xy;\n");
} else {
WRITE(p, " v_texcoord = vec2(0.0);\n");
}
} else {
if (hasTexcoord) {
WRITE(p, " v_texcoord = texcoord * u_uvscaleoffset.xy + u_uvscaleoffset.zw;\n");
if (doBezier || doSpline)
WRITE(p, " v_texcoord = tex * u_uvscaleoffset.xy + u_uvscaleoffset.zw;\n");
else
WRITE(p, " v_texcoord = texcoord * u_uvscaleoffset.xy + u_uvscaleoffset.zw;\n");
} else {
WRITE(p, " v_texcoord = u_uvscaleoffset.zw;\n");
}