Run the depal stuff, seems a bit broken. Add some state filtering.

This commit is contained in:
Henrik Rydgård 2017-12-14 17:50:40 +01:00
parent 970458a0c2
commit f99fa02ba7
12 changed files with 105 additions and 112 deletions

View File

@ -830,7 +830,10 @@ if(ANDROID)
set(nativeExtra ${nativeExtra} ${NativeAppSource})
endif()
set(THIN3D_PLATFORMS ext/native/thin3d/thin3d_gl.cpp)
set(THIN3D_PLATFORMS ext/native/thin3d/thin3d_gl.cpp
ext/native/thin3d/GLRenderManager.cpp
ext/native/thin3d/GLQueueRunner.cpp)
set(THIN3D_PLATFORMS ${THIN3D_PLATFORMS}
ext/native/thin3d/thin3d_vulkan.cpp
ext/native/thin3d/VulkanRenderManager.cpp

View File

@ -167,8 +167,8 @@ DepalShader *DepalShaderCacheGLES::GetDepalettizeShader(uint32_t clutMode, GEBuf
queries.push_back({ &depal->u_pal, "pal" });
std::vector<GLRProgram::Initializer> initializer;
initializer.push_back({ &depal->u_tex, 0 });
initializer.push_back({ &depal->u_pal, 3 });
initializer.push_back({ &depal->u_tex, 0, 0 });
initializer.push_back({ &depal->u_pal, 0, 3 });
std::vector<GLRShader *> shaders{ vertexShader_, fragShader };
@ -177,8 +177,6 @@ DepalShader *DepalShaderCacheGLES::GetDepalettizeShader(uint32_t clutMode, GEBuf
depal->program = program;
depal->fragShader = fragShader;
depal->code = buffer;
depal->a_position = 0;
depal->a_texcoord0 = 1;
cache_[id] = depal;
delete[] buffer;

View File

@ -29,8 +29,6 @@ class DepalShader {
public:
GLRProgram *program;
GLRShader *fragShader;
GLint a_position;
GLint a_texcoord0;
GLint u_tex;
GLint u_pal;
std::string code;

View File

@ -401,8 +401,14 @@ void DrawEngineGLES::DoFlush() {
gpuStats.numFlushes++;
gpuStats.numTrackedVertexArrays = (int)vai_.size();
bool textureNeedsApply = false;
if (gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS) && !gstate.isModeClear() && gstate.isTextureMapEnabled()) {
textureCache_->SetTexture();
gstate_c.Clean(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
textureNeedsApply = true;
}
GEPrimitiveType prim = prevPrim_;
ApplyDrawState(prim);
VShaderID vsid;
Shader *vshader = shaderManager_->ApplyVertexShader(prim, lastVType_, &vsid);
@ -592,12 +598,17 @@ rotateVBO:
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
}
if (textureNeedsApply)
textureCache_->ApplyTexture();
// Need to ApplyDrawState after ApplyTexture because depal can launch a render pass and that wrecks the state.
ApplyDrawState(prim);
ApplyDrawStateLate(false, 0);
LinkedShader *program = shaderManager_->ApplyFragmentShader(vsid, vshader, lastVType_, prim);
GLRInputLayout *inputLayout = SetupDecFmtForDraw(program, dec_->GetDecVtxFmt());
render_->BindVertexBuffer(vertexBuffer);
render_->BindInputLayout(inputLayout, (void *)(uintptr_t)vertexBufferOffset);
render_->BindInputLayout(inputLayout, vertexBufferOffset);
if (useElements) {
if (!indexBuffer) {
indexBufferOffset = (uint32_t)frameData.pushIndex->Push(decIndex, sizeof(uint16_t) * indexGen.VertexCount(), &indexBuffer);
@ -653,6 +664,11 @@ rotateVBO:
prim, vertexCount,
dec_->VertexType(), inds, GE_VTYPE_IDX_16BIT, dec_->GetDecVtxFmt(),
maxIndex, drawBuffer, numTrans, drawIndexed, &params, &result);
if (textureNeedsApply)
textureCache_->ApplyTexture();
ApplyDrawState(prim);
ApplyDrawStateLate(result.setStencil, result.stencilValue);
LinkedShader *program = shaderManager_->ApplyFragmentShader(vsid, vshader, lastVType_, prim);
@ -666,13 +682,13 @@ rotateVBO:
vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(drawBuffer, maxIndex * sizeof(TransformedVertex), &vertexBuffer);
indexBufferOffset = (uint32_t)frameData.pushIndex->Push(inds, sizeof(uint16_t) * numTrans, &indexBuffer);
render_->BindVertexBuffer(vertexBuffer);
render_->BindInputLayout(softwareInputLayout_, (void *)(intptr_t)vertexBufferOffset);
render_->BindInputLayout(softwareInputLayout_, vertexBufferOffset);
render_->BindIndexBuffer(indexBuffer);
render_->DrawIndexed(glprim[prim], numTrans, GL_UNSIGNED_SHORT, (void *)(intptr_t)indexBufferOffset);
} else {
vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(drawBuffer, numTrans * sizeof(TransformedVertex), &vertexBuffer);
render_->BindVertexBuffer(vertexBuffer);
render_->BindInputLayout(softwareInputLayout_, (void *)(intptr_t)vertexBufferOffset);
render_->BindInputLayout(softwareInputLayout_, vertexBufferOffset);
render_->Draw(glprim[prim], 0, numTrans);
}
} else if (result.action == SW_CLEAR) {

View File

@ -466,7 +466,7 @@ void FramebufferManagerGLES::DrawActiveTexture(float x, float y, float w, float
void *dest = drawEngineGL_->GetPushVertexBuffer()->Push(sizeof(verts), &bindOffset, &buffer);
memcpy(dest, verts, sizeof(verts));
render_->BindVertexBuffer(buffer);
render_->BindInputLayout(simple2DInputLayout_, (void *)(intptr_t)bindOffset);
render_->BindInputLayout(simple2DInputLayout_, bindOffset);
render_->Draw(GL_TRIANGLE_STRIP, 0, 4);
}

View File

@ -132,11 +132,6 @@ inline void DrawEngineGLES::ResetShaderBlending() {
void DrawEngineGLES::ApplyDrawState(int prim) {
GLRenderManager *renderManager = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
if (gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS) && !gstate.isModeClear() && gstate.isTextureMapEnabled()) {
textureCache_->SetTexture();
gstate_c.Clean(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
}
if (!gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE)) {
// Nothing to do, let's early-out
return;
@ -341,10 +336,6 @@ void DrawEngineGLES::ApplyDrawStateLate(bool setStencil, int stencilValue) {
fboTexNeedBind_ = false;
}
// Apply the texture after the FBO tex, since it might unbind the texture.
// TODO: Could use a separate texture unit to be safer?
textureCache_->ApplyTexture();
// Apply last, once we know the alpha params of the texture.
if (gstate.isAlphaTestEnabled() || gstate.isColorTestEnabled()) {
fragmentTestCache_->BindTestTexture(2);

View File

@ -53,9 +53,14 @@ TextureCacheGLES::TextureCacheGLES(Draw::DrawContext *draw)
SetupTextureDecoder();
nextTexture_ = nullptr;
std::vector<GLRInputLayout::Entry> entries;
entries.push_back({ 0, 3, GL_FLOAT, GL_FALSE, 20, 0 });
entries.push_back({ 1, 2, GL_FLOAT, GL_FALSE, 20, 12 });
shadeInputLayout_ = render_->CreateInputLayout(entries);
}
TextureCacheGLES::~TextureCacheGLES() {
render_->DeleteInputLayout(shadeInputLayout_);
Clear(true);
}
@ -65,7 +70,7 @@ void TextureCacheGLES::SetFramebufferManager(FramebufferManagerGLES *fbManager)
}
void TextureCacheGLES::ReleaseTexture(TexCacheEntry *entry, bool delete_them) {
DEBUG_LOG(G3D, "Deleting texture %i", entry->textureName);
DEBUG_LOG(G3D, "Deleting texture %08x", entry->addr);
if (delete_them) {
if (entry->textureName) {
render_->DeleteTexture(entry->textureName);
@ -322,21 +327,11 @@ void TextureCacheGLES::Unbind() {
class TextureShaderApplier {
public:
struct Pos {
Pos(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {
}
Pos() {
}
float x;
float y;
float z;
};
struct UV {
UV(float u_, float v_) : u(u_), v(v_) {
}
UV() {
}
float u;
float v;
};
@ -379,69 +374,45 @@ public:
const float top = v1 * invHalfHeight - 1.0f;
const float bottom = v2 * invHalfHeight - 1.0f;
// Points are: BL, BR, TR, TL.
pos_[0] = Pos(left, bottom, -1.0f);
pos_[1] = Pos(right, bottom, -1.0f);
pos_[2] = Pos(right, top, -1.0f);
pos_[3] = Pos(left, top, -1.0f);
pos_[0] = Pos{ left, bottom, -1.0f };
pos_[1] = Pos{ right, bottom, -1.0f };
pos_[2] = Pos{ right, top, -1.0f };
pos_[3] = Pos{ left, top, -1.0f };
// And also the UVs, same order.
const float uvleft = u1 * invWidth;
const float uvright = u2 * invWidth;
const float uvtop = v1 * invHeight;
const float uvbottom = v2 * invHeight;
uv_[0] = UV(uvleft, uvbottom);
uv_[1] = UV(uvright, uvbottom);
uv_[2] = UV(uvright, uvtop);
uv_[3] = UV(uvleft, uvtop);
uv_[0] = UV{ uvleft, uvbottom };
uv_[1] = UV{ uvright, uvbottom };
uv_[2] = UV{ uvright, uvtop };
uv_[3] = UV{ uvleft, uvtop };
}
}
void Use(GLRenderManager *render, DrawEngineGLES *transformDraw) {
void Use(GLRenderManager *render, DrawEngineGLES *transformDraw, GLRInputLayout *inputLayout) {
render->BindProgram(shader_->program);
/*
// Restore will rebind all of the state below.
if (gstate_c.Supports(GPU_SUPPORTS_VAO)) {
static const GLubyte indices[4] = { 0, 1, 3, 2 };
transformDraw->BindBuffer(pos_, sizeof(pos_), uv_, sizeof(uv_));
transformDraw->BindElementBuffer(indices, sizeof(indices));
} else {
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
struct SimpleVertex {
float pos[3];
float uv[2];
};
uint32_t bindOffset;
GLRBuffer *bindBuffer;
SimpleVertex *verts = (SimpleVertex *)transformDraw->GetPushVertexBuffer()->Push(sizeof(SimpleVertex) * 4, &bindOffset, &bindBuffer);
int order[4] = { 0 ,1, 3, 2 };
for (int i = 0; i < 4; i++) {
memcpy(verts[i].pos, &pos_[order[i]], sizeof(Pos));
memcpy(verts[i].uv, &uv_[order[i]], sizeof(UV));
}
glEnableVertexAttribArray(shader_->a_position);
glEnableVertexAttribArray(shader_->a_texcoord0);
*/
render->BindVertexBuffer(bindBuffer);
render->BindInputLayout(inputLayout, bindOffset);
}
void Shade() {
void Shade(GLRenderManager *render) {
static const GLubyte indices[4] = { 0, 1, 3, 2 };
/*
glstate.blend.force(false);
glstate.colorMask.force(true, true, true, true);
glstate.scissorTest.force(false);
glstate.cullFace.force(false);
glstate.depthTest.force(false);
glstate.stencilTest.force(false);
#if !defined(USING_GLES2)
glstate.colorLogicOp.force(false);
#endif
glViewport(0, 0, renderW_, renderH_);
if (gstate_c.Supports(GPU_SUPPORTS_VAO)) {
glVertexAttribPointer(shader_->a_position, 3, GL_FLOAT, GL_FALSE, 12, 0);
glVertexAttribPointer(shader_->a_texcoord0, 2, GL_FLOAT, GL_FALSE, 8, (void *)sizeof(pos_));
glDrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_BYTE, 0);
} else {
glVertexAttribPointer(shader_->a_position, 3, GL_FLOAT, GL_FALSE, 12, pos_);
glVertexAttribPointer(shader_->a_texcoord0, 2, GL_FLOAT, GL_FALSE, 8, uv_);
glDrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_BYTE, indices);
}
glDisableVertexAttribArray(shader_->a_position);
glDisableVertexAttribArray(shader_->a_texcoord0);
glstate.Restore();
*/
render->SetViewport(GLRViewport{ 0, 0, (float)renderW_, (float)renderH_, 0.0f, 1.0f });
render->Draw(GL_TRIANGLE_STRIP, 0, 4);
}
protected:
@ -469,14 +440,13 @@ void TextureCacheGLES::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFram
TextureShaderApplier shaderApply(depal, framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight);
shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset);
shaderApply.Use(render_, drawEngine_);
render_->BindTexture(3, clutTexture);
shaderApply.Use(render_, drawEngine_, shadeInputLayout_);
framebufferManagerGL_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_SKIP_COPY);
render_->BindTexture(3, clutTexture);
render_->SetTextureSampler(GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE, GL_NEAREST, GL_NEAREST, 0.0f);
shaderApply.Shade();
shaderApply.Shade(render_);
draw_->BindFramebufferAsTexture(depalFBO, 0, Draw::FB_COLOR_BIT, 0);
@ -497,6 +467,9 @@ void TextureCacheGLES::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFram
SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight);
InvalidateLastTexture();
// Since we started/ended render passes, might need these.
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
}
ReplacedTextureFormat FromGLESFormat(GLenum fmt) {

View File

@ -101,6 +101,8 @@ private:
ShaderManagerGLES *shaderManager_;
DrawEngineGLES *drawEngine_;
GLRInputLayout *shadeInputLayout_;
enum { INVALID_TEX = -1 };
};

View File

@ -82,11 +82,9 @@ void GLQueueRunner::RunInitSteps(const std::vector<GLRInitStep> &steps) {
glBindFragDataLocation(program->program, 0, "fragColor0");
}
#elif !defined(IOS)
if (gl_extensions.GLES3) {
if (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) {
glBindFragDataLocationIndexedEXT(program->program, 0, 0, "fragColor0");
glBindFragDataLocationIndexedEXT(program->program, 0, 1, "fragColor1");
}
if (gl_extensions.GLES3 && step.create_program.support_dual_source) {
glBindFragDataLocationIndexedEXT(program->program, 0, 0, "fragColor0");
glBindFragDataLocationIndexedEXT(program->program, 0, 1, "fragColor1");
}
#endif
glLinkProgram(program->program);
@ -413,7 +411,9 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step) {
glActiveTexture(GL_TEXTURE0 + activeTexture);
int attrMask = 0;
int colorMask = -1;
int depthMask = -1;
int depthFunc = -1;
// State filtering tracking.
GLuint curArrayBuffer = (GLuint)-1;
GLuint curElemArrayBuffer = (GLuint)-1;
@ -424,8 +424,14 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step) {
case GLRRenderCommand::DEPTH:
if (c.depth.enabled) {
glEnable(GL_DEPTH_TEST);
glDepthMask(c.depth.write);
glDepthFunc(c.depth.func);
if (c.depth.write != depthMask) {
glDepthMask(c.depth.write);
depthMask = c.depth.write;
}
if (c.depth.func != depthFunc) {
glDepthFunc(c.depth.func);
depthFunc = c.depth.func;
}
} else {
glDisable(GL_DEPTH_TEST);
}
@ -438,11 +444,15 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step) {
} else {
glDisable(GL_BLEND);
}
glColorMask(c.blend.mask & 1, (c.blend.mask >> 1) & 1, (c.blend.mask >> 2) & 1, (c.blend.mask >> 3) & 1);
if (c.blend.mask != colorMask) {
glColorMask(c.blend.mask & 1, (c.blend.mask >> 1) & 1, (c.blend.mask >> 2) & 1, (c.blend.mask >> 3) & 1);
colorMask = c.blend.mask;
}
break;
case GLRRenderCommand::CLEAR:
glDisable(GL_SCISSOR_TEST);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
colorMask = 0xF;
if (c.clear.clearMask & GL_COLOR_BUFFER_BIT) {
float color[4];
Uint8x4ToFloat4(color, c.clear.clearColor);
@ -485,7 +495,11 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step) {
// TODO: Support FP viewports through glViewportArrays
glViewport((GLint)c.viewport.vp.x, (GLint)y, (GLsizei)c.viewport.vp.w, (GLsizei)c.viewport.vp.h);
#if !defined(USING_GLES2)
glDepthRange(c.viewport.vp.minZ, c.viewport.vp.maxZ);
#else
glDepthRangef(c.viewport.vp.minZ, c.viewport.vp.maxZ);
#endif
break;
}
case GLRRenderCommand::SCISSOR:
@ -597,8 +611,10 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step) {
}
case GLRRenderCommand::BINDPROGRAM:
{
glUseProgram(c.program.program->program);
curProgram = c.program.program;
if (curProgram != c.program.program) {
glUseProgram(c.program.program->program);
curProgram = c.program.program;
}
break;
}
case GLRRenderCommand::BIND_INPUT_LAYOUT:

View File

@ -136,7 +136,7 @@ struct GLRRenderData {
} program;
struct {
GLRInputLayout *inputLayout;
intptr_t offset;
size_t offset;
} inputLayout;
struct {
GLenum wrapS;

View File

@ -3,7 +3,9 @@
#include <thread>
#include <map>
#include <vector>
#include <string>
#include <mutex>
#include <condition_variable>
#include <cassert>
#include "gfx/gl_common.h"
@ -83,8 +85,9 @@ public:
};
// Must ONLY be called from GLQueueRunner!
// Also it's pretty slow...
int GetUniformLoc(const char *name) {
auto iter = uniformCache_.find(name);
auto iter = uniformCache_.find(std::string(name));
int loc = -1;
if (iter != uniformCache_.end()) {
loc = iter->second.loc_;
@ -232,6 +235,7 @@ public:
step.create_program.program->semantics_ = semantics;
step.create_program.program->queries_ = queries;
step.create_program.program->initialize_ = initalizers;
step.create_program.support_dual_source = supportDualSource;
_assert_msg_(G3D, shaders.size() > 0, "Can't create a program with zero shaders");
for (int i = 0; i < shaders.size(); i++) {
step.create_program.shaders[i] = shaders[i];
@ -288,15 +292,15 @@ public:
void BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLRect2D dstRect, int aspectMask, bool filter);
// Takes ownership of data if deleteData = true.
void BufferSubdata(GLRBuffer *buffer, int offset, int size, uint8_t *data, bool deleteData = true) {
void BufferSubdata(GLRBuffer *buffer, size_t offset, size_t size, uint8_t *data, bool deleteData = true) {
// TODO: Maybe should be a render command instead of an init command? When possible it's better as
// an init command, that's for sure.
GLRInitStep step{ GLRInitStepType::BUFFER_SUBDATA };
_dbg_assert_(G3D, offset >= 0);
_dbg_assert_(G3D, offset <= buffer->size_ - size);
step.buffer_subdata.buffer = buffer;
step.buffer_subdata.offset = offset;
step.buffer_subdata.size = size;
step.buffer_subdata.offset = (int)offset;
step.buffer_subdata.size = (int)size;
step.buffer_subdata.data = data;
step.buffer_subdata.deleteData = deleteData;
initSteps_.push_back(step);
@ -366,12 +370,12 @@ public:
curRenderStep_->commands.push_back(data);
}
void BindInputLayout(GLRInputLayout *inputLayout, const void *offset) {
void BindInputLayout(GLRInputLayout *inputLayout, size_t offset) {
_dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
assert(inputLayout);
GLRRenderData data{ GLRRenderCommand::BIND_INPUT_LAYOUT };
data.inputLayout.inputLayout = inputLayout;
data.inputLayout.offset = (intptr_t)offset;
data.inputLayout.offset = offset;
curRenderStep_->commands.push_back(data);
}

View File

@ -1115,7 +1115,6 @@ void OpenGLContext::DrawIndexed(int vertexCount, int offset) {
}
void OpenGLContext::DrawUP(const void *vdata, int vertexCount) {
#if 1
int stride = curPipeline_->inputLayout->stride;
size_t dataSize = stride * vertexCount;
@ -1127,16 +1126,9 @@ void OpenGLContext::DrawUP(const void *vdata, int vertexCount) {
ApplySamplers();
renderManager_.BindVertexBuffer(buf);
renderManager_.BindInputLayout(curPipeline_->inputLayout->inputLayout_, (void *)offset);
renderManager_.BindInputLayout(curPipeline_->inputLayout->inputLayout_, offset);
renderManager_.Draw(curPipeline_->prim, 0, vertexCount);
#else
ApplySamplers();
renderManager_.BindInputLayout(curPipeline_->inputLayout->inputLayout_, (void *)vdata);
renderManager_.Draw(curPipeline_->prim, 0, vertexCount);
renderManager_.UnbindInputLayout(curPipeline_->inputLayout->inputLayout_);
#endif
}
void OpenGLContext::Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) {