diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index 94072a85b2..a54e58c61e 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -208,6 +208,7 @@ void FramebufferManagerCommon::SetNumExtraFBOs(int num) { extraFBOs_.push_back(fbo); } currentRenderVfb_ = 0; + // TODO: Should probably not do this bind. if (num != 0) draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }); } diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index be77ad5362..d05489dd92 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -115,7 +115,8 @@ enum { enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MAX = 4 }; -DrawEngineGLES::DrawEngineGLES(Draw::DrawContext *draw) : vai_(256), draw_(draw) { +DrawEngineGLES::DrawEngineGLES(Draw::DrawContext *draw) : vai_(256), draw_(draw), inputLayoutMap_(16) { + render_ = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER); decOptions_.expandAllWeightsToFloat = false; decOptions_.expand8BitNormalsToFloat = false; @@ -178,9 +179,29 @@ void DrawEngineGLES::InitDeviceObjects() { } else { ERROR_LOG(G3D, "Device objects already initialized!"); } + + for (int i = 0; i < GLRenderManager::MAX_INFLIGHT_FRAMES; i++) { + frameData_[i].pushVertex = new GLPushBuffer(render_, 1024 * 1024); + frameData_[i].pushIndex = new GLPushBuffer(render_, 512 * 1024); + } + + int vertexSize = sizeof(TransformedVertex); + std::vector<GLRInputLayout::Entry> entries; + entries.push_back({ ATTR_POSITION, 4, GL_FLOAT, GL_FALSE, vertexSize, 0 }); + entries.push_back({ ATTR_TEXCOORD, 3, GL_FLOAT, GL_FALSE, vertexSize, offsetof(TransformedVertex, u) }); + entries.push_back({ ATTR_COLOR0, 4, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, offsetof(TransformedVertex, color0) }); + entries.push_back({ ATTR_COLOR1, 3, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, offsetof(TransformedVertex, color1) }); + softwareInputLayout_ = render_->CreateInputLayout(entries); } void DrawEngineGLES::DestroyDeviceObjects() { + for (int i = 0; i < GLRenderManager::MAX_INFLIGHT_FRAMES; i++) { + frameData_[i].pushVertex->Destroy(); + frameData_[i].pushIndex->Destroy(); + delete frameData_[i].pushVertex; + delete frameData_[i].pushIndex; + } + ClearTrackedVertexArrays(); if (!bufferNameCache_.empty()) { glstate.arrayBuffer.unbind(); @@ -194,6 +215,27 @@ void DrawEngineGLES::DestroyDeviceObjects() { glDeleteVertexArrays(1, &sharedVao_); } } + + render_->DeleteInputLayout(softwareInputLayout_); +} + +void DrawEngineGLES::ClearInputLayoutMap() { + inputLayoutMap_.Iterate([&](const uint32_t &key, GLRInputLayout *il) { + render_->DeleteInputLayout(il); + }); + inputLayoutMap_.Clear(); +} + +void DrawEngineGLES::BeginFrame() { + FrameData &frameData = frameData_[render_->GetCurFrame()]; + frameData.pushIndex->Begin(); + frameData.pushVertex->Begin(); +} + +void DrawEngineGLES::EndFrame() { + FrameData &frameData = frameData_[render_->GetCurFrame()]; + frameData.pushIndex->End(); + frameData.pushVertex->End(); } struct GlTypeInfo { @@ -220,24 +262,40 @@ static const GlTypeInfo GLComp[] = { {GL_UNSIGNED_SHORT, 4, GL_TRUE},// DEC_U16_4, }; -static inline void VertexAttribSetup(int attrib, int fmt, int stride, u8 *ptr) { +static inline void VertexAttribSetup(int attrib, int fmt, int stride, int offset, std::vector<GLRInputLayout::Entry> &entries) { if (fmt) { const GlTypeInfo &type = GLComp[fmt]; - glVertexAttribPointer(attrib, type.count, type.type, type.normalized, stride, ptr); + GLRInputLayout::Entry entry; + entry.offset = offset; + entry.location = attrib; + entry.normalized = type.normalized; + entry.type = type.type; + entry.stride = stride; + entry.count = type.count; + entries.push_back(entry); } } // TODO: Use VBO and get rid of the vertexData pointers - with that, we will supply only offsets -static void SetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decFmt, u8 *vertexData) { - CHECK_GL_ERROR_IF_DEBUG(); - VertexAttribSetup(ATTR_W1, decFmt.w0fmt, decFmt.stride, vertexData + decFmt.w0off); - VertexAttribSetup(ATTR_W2, decFmt.w1fmt, decFmt.stride, vertexData + decFmt.w1off); - VertexAttribSetup(ATTR_TEXCOORD, decFmt.uvfmt, decFmt.stride, vertexData + decFmt.uvoff); - VertexAttribSetup(ATTR_COLOR0, decFmt.c0fmt, decFmt.stride, vertexData + decFmt.c0off); - VertexAttribSetup(ATTR_COLOR1, decFmt.c1fmt, decFmt.stride, vertexData + decFmt.c1off); - VertexAttribSetup(ATTR_NORMAL, decFmt.nrmfmt, decFmt.stride, vertexData + decFmt.nrmoff); - VertexAttribSetup(ATTR_POSITION, decFmt.posfmt, decFmt.stride, vertexData + decFmt.posoff); - CHECK_GL_ERROR_IF_DEBUG(); +GLRInputLayout *DrawEngineGLES::SetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decFmt) { + uint32_t key = decFmt.id; + GLRInputLayout *inputLayout = inputLayoutMap_.Get(key); + if (inputLayout) { + return inputLayout; + } + + std::vector<GLRInputLayout::Entry> entries; + VertexAttribSetup(ATTR_W1, decFmt.w0fmt, decFmt.stride, decFmt.w0off, entries); + VertexAttribSetup(ATTR_W2, decFmt.w1fmt, decFmt.stride, decFmt.w1off, entries); + VertexAttribSetup(ATTR_TEXCOORD, decFmt.uvfmt, decFmt.stride, decFmt.uvoff, entries); + VertexAttribSetup(ATTR_COLOR0, decFmt.c0fmt, decFmt.stride, decFmt.c0off, entries); + VertexAttribSetup(ATTR_COLOR1, decFmt.c1fmt, decFmt.stride, decFmt.c1off, entries); + VertexAttribSetup(ATTR_NORMAL, decFmt.nrmfmt, decFmt.stride, decFmt.nrmoff, entries); + VertexAttribSetup(ATTR_POSITION, decFmt.posfmt, decFmt.stride, decFmt.posoff, entries); + + inputLayout = render_->CreateInputLayout(entries); + inputLayoutMap_.Insert(key, inputLayout); + return inputLayout; } void DrawEngineGLES::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) { @@ -304,6 +362,17 @@ void DrawEngineGLES::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, i } } +void DrawEngineGLES::DecodeVertsToPushBuffer(GLPushBuffer *push, uint32_t *bindOffset, GLRBuffer **buf) { + u8 *dest = decoded; + + // Figure out how much pushbuffer space we need to allocate. + if (push) { + int vertsToDecode = ComputeNumVertsToDecode(); + dest = (u8 *)push->Push(vertsToDecode * dec_->GetDecVtxFmt().stride, bindOffset, buf); + } + DecodeVerts(dest); +} + void DrawEngineGLES::MarkUnreliable(VertexArrayInfo *vai) { vai->status = VertexArrayInfo::VAI_UNRELIABLE; if (vai->vbo) { @@ -430,18 +499,23 @@ void DrawEngineGLES::FreeVertexArray(VertexArrayInfo *vai) { void DrawEngineGLES::DoFlush() { PROFILE_THIS_SCOPE("flush"); - CHECK_GL_ERROR_IF_DEBUG(); + FrameData &frameData = frameData_[render_->GetCurFrame()]; + gpuStats.numFlushes++; gpuStats.numTrackedVertexArrays = (int)vai_.size(); GEPrimitiveType prim = prevPrim_; ApplyDrawState(prim); - CHECK_GL_ERROR_IF_DEBUG(); VShaderID vsid; Shader *vshader = shaderManager_->ApplyVertexShader(prim, lastVType_, &vsid); + GLRBuffer *vertexBuffer = nullptr; + GLRBuffer *indexBuffer = nullptr; + uint32_t vertexBufferOffset = 0; + uint32_t indexBufferOffset = 0; + if (vshader->UseHWTransform()) { GLuint vbo = 0, ebo = 0; int vertexCount = 0; @@ -453,6 +527,9 @@ void DrawEngineGLES::DoFlush() { if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) useCache = false; + // TEMPORARY + useCache = false; + if (useCache) { u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263 VertexArrayInfo *vai = vai_.Get(id); @@ -597,7 +674,7 @@ void DrawEngineGLES::DoFlush() { vai->lastFrame = gpuStats.numFlips; } else { - DecodeVerts(decoded); + DecodeVertsToPushBuffer(frameData.pushVertex, &vertexBufferOffset, &vertexBuffer); rotateVBO: gpuStats.numUncachedVertsDrawn += indexGen.VertexCount(); @@ -606,9 +683,6 @@ rotateVBO: if (!useElements && indexGen.PureCount()) { vertexCount = indexGen.PureCount(); } - glstate.arrayBuffer.unbind(); - glstate.elementArrayBuffer.unbind(); - prim = indexGen.Prim(); } @@ -630,16 +704,21 @@ rotateVBO: } LinkedShader *program = shaderManager_->ApplyFragmentShader(vsid, vshader, lastVType_, prim); - SetupDecFmtForDraw(program, dec_->GetDecVtxFmt(), vbo ? 0 : decoded); - + GLRInputLayout *inputLayout = SetupDecFmtForDraw(program, dec_->GetDecVtxFmt()); + render_->BindVertexBuffer(vertexBuffer); + render_->BindInputLayout(inputLayout, (void *)(uintptr_t)vertexBufferOffset); if (useElements) { + if (!indexBuffer) { + indexBufferOffset = (uint32_t)frameData.pushIndex->Push(decIndex, sizeof(uint16_t) * indexGen.VertexCount(), &indexBuffer); + render_->BindIndexBuffer(indexBuffer); + } if (gstate_c.bezier || gstate_c.spline) // Instanced rendering for instanced tessellation - glDrawElementsInstanced(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, ebo ? 0 : (GLvoid*)decIndex, numPatches); + ; // glDrawElementsInstanced(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, (GLvoid*)(intptr_t)indexBufferOffset, numPatches); else - glDrawElements(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, ebo ? 0 : (GLvoid*)decIndex); + render_->DrawIndexed(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, (GLvoid*)(intptr_t)indexBufferOffset); } else { - glDrawArrays(glprim[prim], 0, vertexCount); + render_->Draw(glprim[prim], 0, vertexCount); } } else { DecodeVerts(decoded); @@ -695,28 +774,18 @@ rotateVBO: bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX; - const uint8_t *bufferStart = (const uint8_t *)drawBuffer; - if (gstate_c.Supports(GPU_SUPPORTS_VAO)) { - bufferStart = 0; - BindBuffer(drawBuffer, vertexSize * maxIndex); - if (drawIndexed) { - BindElementBuffer(inds, sizeof(short) * numTrans); - inds = 0; - } - } else { - glstate.arrayBuffer.unbind(); - glstate.elementArrayBuffer.unbind(); - } - - glVertexAttribPointer(ATTR_POSITION, 4, GL_FLOAT, GL_FALSE, vertexSize, bufferStart); - int attrMask = program->attrMask; - if (attrMask & (1 << ATTR_TEXCOORD)) glVertexAttribPointer(ATTR_TEXCOORD, doTextureProjection ? 3 : 2, GL_FLOAT, GL_FALSE, vertexSize, bufferStart + offsetof(TransformedVertex, u)); - if (attrMask & (1 << ATTR_COLOR0)) glVertexAttribPointer(ATTR_COLOR0, 4, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, bufferStart + offsetof(TransformedVertex, color0)); - if (attrMask & (1 << ATTR_COLOR1)) glVertexAttribPointer(ATTR_COLOR1, 3, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, bufferStart + offsetof(TransformedVertex, color1)); if (drawIndexed) { - glDrawElements(glprim[prim], numTrans, GL_UNSIGNED_SHORT, inds); + vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(drawBuffer, maxIndex * sizeof(TransformedVertex), &vertexBuffer); + indexBufferOffset = (uint32_t)frameData.pushIndex->Push(decIndex, sizeof(uint16_t) * indexGen.VertexCount(), &indexBuffer); + render_->BindIndexBuffer(indexBuffer); + render_->BindVertexBuffer(vertexBuffer); + render_->BindInputLayout(softwareInputLayout_, (void *)(intptr_t)vertexBufferOffset); + render_->DrawIndexed(glprim[prim], numTrans, GL_UNSIGNED_SHORT, inds); } else { - glDrawArrays(glprim[prim], 0, numTrans); + vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(drawBuffer, numTrans * sizeof(TransformedVertex), &vertexBuffer); + render_->BindVertexBuffer(vertexBuffer); + render_->BindInputLayout(softwareInputLayout_, (void *)(intptr_t)vertexBufferOffset); + render_->Draw(glprim[prim], 0, numTrans); } } else if (result.action == SW_CLEAR) { u32 clearColor = result.color; @@ -735,26 +804,12 @@ rotateVBO: framebufferManager_->SetDepthUpdated(); } - // Note that scissor may still apply while clearing. Turn off other tests for the clear. - glstate.stencilTest.disable(); - glstate.stencilMask.set(0xFF); - glstate.depthTest.disable(); - GLbitfield target = 0; if (colorMask || alphaMask) target |= GL_COLOR_BUFFER_BIT; if (alphaMask) target |= GL_STENCIL_BUFFER_BIT; if (depthMask) target |= GL_DEPTH_BUFFER_BIT; - glstate.colorMask.set(colorMask, colorMask, colorMask, alphaMask); - glClearColor(col[0], col[1], col[2], col[3]); -#ifdef USING_GLES2 - glClearDepthf(clearDepth); -#else - glClearDepth(clearDepth); -#endif - // Stencil takes alpha. - glClearStencil(clearColor >> 24); - glClear(target); + render_->Clear(clearColor, clearDepth, clearColor >> 24, target); framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason); int scissorX1 = gstate.getScissorX1(); @@ -766,6 +821,7 @@ rotateVBO: if (g_Config.bBlockTransferGPU && (gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && colorMask && (alphaMask || gstate.FrameBufFormat() == GE_FORMAT_565)) { framebufferManager_->ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, clearColor); } + gstate_c.Dirty(DIRTY_BLEND_STATE); // Make sure the color mask gets re-applied. } } diff --git a/GPU/GLES/DrawEngineGLES.h b/GPU/GLES/DrawEngineGLES.h index f50b0ffb35..ce2b51d025 100644 --- a/GPU/GLES/DrawEngineGLES.h +++ b/GPU/GLES/DrawEngineGLES.h @@ -28,6 +28,7 @@ #include "GPU/Common/GPUStateUtils.h" #include "GPU/GLES/FragmentShaderGeneratorGLES.h" #include "gfx/gl_common.h" +#include "thin3d/GLRenderManager.h" class LinkedShader; class ShaderManagerGLES; @@ -126,6 +127,10 @@ public: void ClearTrackedVertexArrays() override; void DecimateTrackedVertexArrays(); + void BeginFrame(); + void EndFrame(); + + // So that this can be inlined void Flush() { if (!numDrawCalls) @@ -151,6 +156,8 @@ public: GLuint BindElementBuffer(const void *p, size_t sz); void DecimateBuffers(); + void ClearInputLayoutMap(); + private: void InitDeviceObjects(); void DestroyDeviceObjects(); @@ -160,14 +167,28 @@ private: void ApplyDrawStateLate(); void ResetShaderBlending(); + GLRInputLayout *SetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decFmt); + + void DecodeVertsToPushBuffer(GLPushBuffer *push, uint32_t *bindOffset, GLRBuffer **buf); + GLuint AllocateBuffer(size_t sz); void FreeBuffer(GLuint buf); void FreeVertexArray(VertexArrayInfo *vai); void MarkUnreliable(VertexArrayInfo *vai); + struct FrameData { + GLPushBuffer *pushVertex; + GLPushBuffer *pushIndex; + }; + FrameData frameData_[GLRenderManager::MAX_INFLIGHT_FRAMES]; + PrehashMap<VertexArrayInfo *, nullptr> vai_; + DenseHashMap<uint32_t, GLRInputLayout *, nullptr> inputLayoutMap_; + + GLRInputLayout *softwareInputLayout_ = nullptr; + // Vertex buffer objects // Element buffer objects struct BufferNameInfo { @@ -177,6 +198,7 @@ private: bool used; int lastFrame; }; + GLRenderManager *render_; std::vector<GLuint> bufferNameCache_; std::multimap<size_t, GLuint> freeSizedBuffers_; std::unordered_map<GLuint, BufferNameInfo> bufferNameInfo_; diff --git a/GPU/GLES/FramebufferManagerGLES.cpp b/GPU/GLES/FramebufferManagerGLES.cpp index 20eb4aaf8c..bcda525e8a 100644 --- a/GPU/GLES/FramebufferManagerGLES.cpp +++ b/GPU/GLES/FramebufferManagerGLES.cpp @@ -82,6 +82,7 @@ const int MAX_PBO = 2; void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format); void FramebufferManagerGLES::DisableState() { + /* glstate.blend.disable(); glstate.cullFace.disable(); glstate.depthTest.disable(); @@ -92,7 +93,7 @@ void FramebufferManagerGLES::DisableState() { #endif glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glstate.stencilMask.set(0xFF); - + */ gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE); } @@ -247,6 +248,7 @@ FramebufferManagerGLES::FramebufferManagerGLES(Draw::DrawContext *draw) : needBackBufferYSwap_ = true; needGLESRebinds_ = true; CreateDeviceObjects(); + render_ = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER); } void FramebufferManagerGLES::Init() { diff --git a/GPU/GLES/FramebufferManagerGLES.h b/GPU/GLES/FramebufferManagerGLES.h index 94c87ae982..d7686a0c96 100644 --- a/GPU/GLES/FramebufferManagerGLES.h +++ b/GPU/GLES/FramebufferManagerGLES.h @@ -29,6 +29,7 @@ #include "Core/Config.h" #include "GPU/GPUCommon.h" #include "GPU/Common/FramebufferCommon.h" +#include "thin3d/GLRenderManager.h" struct GLSLProgram; class TextureCacheGLES; @@ -110,6 +111,8 @@ private: void PackFramebufferSync_(VirtualFramebuffer *vfb, int x, int y, int w, int h) override; void PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h); + GLRenderManager *render_; + // Used by DrawPixels unsigned int drawPixelsTex_; GEBufferFormat drawPixelsTexFormat_; diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index 78528e7a19..beb5fbbfc4 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -403,6 +403,10 @@ void GPU_GLES::BuildReportingInfo() { void GPU_GLES::DeviceLost() { ILOG("GPU_GLES: DeviceLost"); + // Simply drop all caches and textures. + // FBOs appear to survive? Or no? + // TransformDraw has registered as a GfxResourceHolder. + drawEngine_.ClearInputLayoutMap(); shaderManagerGL_->ClearCache(false); textureCacheGL_->Clear(false); fragmentTestCache_.Clear(false); @@ -451,6 +455,12 @@ void GPU_GLES::BeginHostFrame() { shaderManagerGL_->DirtyShader(); textureCacheGL_->NotifyConfigChanged(); } + + drawEngine_.BeginFrame(); +} + +void GPU_GLES::EndHostFrame() { + drawEngine_.EndFrame(); } inline void GPU_GLES::UpdateVsyncInterval(bool force) { diff --git a/GPU/GLES/GPU_GLES.h b/GPU/GLES/GPU_GLES.h index f99fe6d3e3..4378f166a7 100644 --- a/GPU/GLES/GPU_GLES.h +++ b/GPU/GLES/GPU_GLES.h @@ -80,6 +80,7 @@ public: std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override; void BeginHostFrame() override; + void EndHostFrame() override; protected: void FastRunLoop(DisplayList &list) override; diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index b2a54a3d42..08b9072feb 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -166,21 +166,15 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, attrMask = vs->GetAttrMask(); availableUniforms = vs->GetUniformMask() | fs->GetUniformMask(); - program = render->CreateProgram(shaders, semantics, queries, gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND); + std::vector<GLRProgram::Initializer> initialize; + initialize.push_back({ &u_tex, 0, 0 }); + initialize.push_back({ &u_fbotex, 0, 1 }); + initialize.push_back({ &u_testtex, 0, 2 }); + initialize.push_back({ &u_tess_pos_tex, 4 }); // Texture unit 4 + initialize.push_back({ &u_tess_tex_tex, 5 }); // Texture unit 5 + initialize.push_back({ &u_tess_col_tex, 6 }); // Texture unit 6 - render->BindProgram(program); - - // Default uniform values - render->SetUniformI1(&u_tex, 0); - render->SetUniformI1(&u_fbotex, 1); - render->SetUniformI1(&u_fbotex, 2); - - if (u_tess_pos_tex != -1) - render->SetUniformI1(&u_tess_pos_tex, 4); // Texture unit 4 - if (u_tess_tex_tex != -1) - render->SetUniformI1(&u_tess_tex_tex, 5); // Texture unit 5 - if (u_tess_col_tex != -1) - render->SetUniformI1(&u_tess_col_tex, 6); // Texture unit 6 + program = render->CreateProgram(shaders, semantics, queries, initialize, gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND); // The rest, use the "dirty" mechanism. dirtyUniforms = DIRTY_ALL_UNIFORMS; diff --git a/GPU/GLES/StateMappingGLES.cpp b/GPU/GLES/StateMappingGLES.cpp index f810087770..54acebcb1c 100644 --- a/GPU/GLES/StateMappingGLES.cpp +++ b/GPU/GLES/StateMappingGLES.cpp @@ -230,10 +230,14 @@ void DrawEngineGLES::ApplyDrawState(int prim) { } #endif int mask = (int)rmask | ((int)gmask << 1) | ((int)bmask << 2) | ((int)amask << 3); - renderManager->SetBlendAndMask(mask, blendState.enabled, - glBlendFactorLookup[(size_t)blendState.srcColor], glBlendFactorLookup[(size_t)blendState.dstColor], - glBlendFactorLookup[(size_t)blendState.srcAlpha], glBlendFactorLookup[(size_t)blendState.dstAlpha], - glBlendEqLookup[(size_t)blendState.eqColor], glBlendEqLookup[(size_t)blendState.eqAlpha]); + if (blendState.enabled) { + renderManager->SetBlendAndMask(mask, blendState.enabled, + glBlendFactorLookup[(size_t)blendState.srcColor], glBlendFactorLookup[(size_t)blendState.dstColor], + glBlendFactorLookup[(size_t)blendState.srcAlpha], glBlendFactorLookup[(size_t)blendState.dstAlpha], + glBlendEqLookup[(size_t)blendState.eqColor], glBlendEqLookup[(size_t)blendState.eqAlpha]); + } else { + renderManager->SetNoBlendAndMask(mask); + } #ifndef USING_GLES2 if (gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) { @@ -288,9 +292,13 @@ void DrawEngineGLES::ApplyDrawState(int prim) { GenericStencilFuncState stencilState; ConvertStencilFuncState(stencilState); // Stencil Test - renderManager->SetStencil(stencilState.enabled, compareOps[stencilState.testFunc], - stencilOps[stencilState.sFail], stencilOps[stencilState.zFail], stencilOps[stencilState.zPass], - stencilState.writeMask, stencilState.testMask, stencilState.testRef); + if (stencilState.enabled) { + renderManager->SetStencil(stencilState.enabled, compareOps[stencilState.testFunc], + stencilOps[stencilState.sFail], stencilOps[stencilState.zFail], stencilOps[stencilState.zPass], + stencilState.writeMask, stencilState.testMask, stencilState.testRef); + } else { + renderManager->SetStencilDisabled(); + } } } diff --git a/ext/native/thin3d/GLQueueRunner.cpp b/ext/native/thin3d/GLQueueRunner.cpp index c186ea065d..0ddc148c0d 100644 --- a/ext/native/thin3d/GLQueueRunner.cpp +++ b/ext/native/thin3d/GLQueueRunner.cpp @@ -70,7 +70,6 @@ void GLQueueRunner::RunInitSteps(const std::vector<GLRInitStep> &steps) { } } #endif - glLinkProgram(program->program); GLint linkStatus = GL_FALSE; @@ -111,8 +110,17 @@ void GLQueueRunner::RunInitSteps(const std::vector<GLRInitStep> &steps) { *x.dest = glGetUniformLocation(program->program, x.name); } - // Here we could (using glGetAttribLocation) save a bitmask about which pieces of vertex data are used in the shader - // and then AND it with the vertex format bitmask later... + // Run initializers. + for (int i = 0; i < program->initialize_.size(); i++) { + auto &init = program->initialize_[i]; + GLint uniform = *init.uniform; + if (uniform != -1) { + switch (init.type) { + case 0: + glUniform1i(uniform, init.value); + } + } + } } break; case GLRInitStepType::CREATE_SHADER: @@ -240,8 +248,11 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step) { } else { glDisable(GL_BLEND); } + glColorMask(c.blend.mask & 1, (c.blend.mask >> 1) & 1, (c.blend.mask >> 2) & 1, (c.blend.mask >> 3) & 1); break; case GLRRenderCommand::CLEAR: + glDisable(GL_SCISSOR_TEST); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); if (c.clear.clearMask & GL_COLOR_BUFFER_BIT) { float color[4]; Uint8x4ToFloat4(color, c.clear.clearColor); @@ -258,6 +269,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step) { glClearStencil(c.clear.clearStencil); } glClear(c.clear.clearMask); + glEnable(GL_SCISSOR_TEST); break; case GLRRenderCommand::BLENDCOLOR: glBlendColor(c.blendColor.color[0], c.blendColor.color[1], c.blendColor.color[2], c.blendColor.color[3]); @@ -376,6 +388,12 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step) { glBindBuffer(GL_ARRAY_BUFFER, buf); break; } + case GLRRenderCommand::BIND_INDEX_BUFFER: + { + GLuint buf = c.bind_buffer.buffer ? c.bind_buffer.buffer->buffer : 0; + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buf); + break; + } case GLRRenderCommand::GENMIPS: glGenerateMipmap(GL_TEXTURE_2D); break; diff --git a/ext/native/thin3d/GLQueueRunner.h b/ext/native/thin3d/GLQueueRunner.h index f3d5c7df17..810027d461 100644 --- a/ext/native/thin3d/GLQueueRunner.h +++ b/ext/native/thin3d/GLQueueRunner.h @@ -44,6 +44,7 @@ enum class GLRRenderCommand : uint8_t { BIND_FB_TEXTURE, BIND_INPUT_LAYOUT, BIND_VERTEX_BUFFER, + BIND_INDEX_BUFFER, GENMIPS, DRAW, DRAW_INDEXED, diff --git a/ext/native/thin3d/GLRenderManager.cpp b/ext/native/thin3d/GLRenderManager.cpp index 226aed1458..1c05cc4577 100644 --- a/ext/native/thin3d/GLRenderManager.cpp +++ b/ext/native/thin3d/GLRenderManager.cpp @@ -352,6 +352,14 @@ GLPushBuffer::~GLPushBuffer() { assert(buffers_.empty()); } +void GLPushBuffer::Map() { + assert(!writePtr_); + // TODO: Even a good old glMapBuffer could actually work well here. + // VkResult res = vkMapMemory(device_, buffers_[buf_].deviceMemory, 0, size_, 0, (void **)(&writePtr_)); + writePtr_ = buffers_[buf_].deviceMemory; + assert(writePtr_); +} + void GLPushBuffer::Unmap() { assert(writePtr_); // Here we should simply upload everything to the buffers. diff --git a/ext/native/thin3d/GLRenderManager.h b/ext/native/thin3d/GLRenderManager.h index cd77d12152..a3ee244b9f 100644 --- a/ext/native/thin3d/GLRenderManager.h +++ b/ext/native/thin3d/GLRenderManager.h @@ -89,9 +89,16 @@ public: const char *name; }; + struct Initializer { + GLint *uniform; + int type; + int value; + }; + GLuint program = 0; std::vector<Semantic> semantics_; std::vector<UniformLocQuery> queries_; + std::vector<Initializer> initialize_; struct UniformInfo { int loc_; @@ -222,12 +229,15 @@ public: return step.create_shader.shader; } - GLRProgram *CreateProgram(std::vector<GLRShader *> shaders, std::vector<GLRProgram::Semantic> semantics, std::vector<GLRProgram::UniformLocQuery> queries, bool supportDualSource) { + GLRProgram *CreateProgram( + std::vector<GLRShader *> shaders, std::vector<GLRProgram::Semantic> semantics, std::vector<GLRProgram::UniformLocQuery> queries, + std::vector<GLRProgram::Initializer> initalizers, bool supportDualSource) { GLRInitStep step{ GLRInitStepType::CREATE_PROGRAM }; assert(shaders.size() <= ARRAY_SIZE(step.create_program.shaders)); step.create_program.program = new GLRProgram(); step.create_program.program->semantics_ = semantics; step.create_program.program->queries_ = queries; + step.create_program.program->initialize_ = initalizers; for (int i = 0; i < shaders.size(); i++) { step.create_program.shaders[i] = shaders[i]; } @@ -327,6 +337,13 @@ public: curRenderStep_->commands.push_back(data); } + void BindIndexBuffer(GLRBuffer *buffer) { // Want to support an offset but can't in ES 2.0. We supply an offset when binding the buffers instead. + _dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); + GLRRenderData data{ GLRRenderCommand::BIND_INDEX_BUFFER}; + data.bind_buffer.buffer = buffer; + curRenderStep_->commands.push_back(data); + } + void BindInputLayout(GLRInputLayout *inputLayout, const void *offset) { _dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); assert(inputLayout); @@ -403,7 +420,7 @@ public: void SetUniformM4x4(GLint *loc, const float *udata) { _dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData data{ GLRRenderCommand::UNIFORM4F }; + GLRRenderData data{ GLRRenderCommand::UNIFORMMATRIX }; data.uniformMatrix4.loc = loc; memcpy(data.uniformMatrix4.m, udata, sizeof(float) * 16); curRenderStep_->commands.push_back(data); @@ -442,6 +459,7 @@ public: void SetStencil(bool enabled, GLenum func, GLenum sFail, GLenum zFail, GLenum pass, uint8_t writeMask, uint8_t compareMask, uint8_t refValue) { _dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); GLRRenderData data{ GLRRenderCommand::STENCIL }; + data.stencil.enabled = enabled; data.stencil.func = func; data.stencil.sFail = sFail; data.stencil.zFail = zFail; @@ -452,6 +470,14 @@ public: curRenderStep_->commands.push_back(data); } + void SetStencilDisabled() { + _dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); + GLRRenderData data; + data.cmd = GLRRenderCommand::STENCIL; + data.stencil.enabled = false; + curRenderStep_->commands.push_back(data); + } + void SetBlendFactor(const float color[4]) { _dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); GLRRenderData data{ GLRRenderCommand::BLENDCOLOR }; @@ -613,13 +639,7 @@ public: Unmap(); } - void Map() { - assert(!writePtr_); - // VkResult res = vkMapMemory(device_, buffers_[buf_].deviceMemory, 0, size_, 0, (void **)(&writePtr_)); - writePtr_ = buffers_[buf_].deviceMemory; - assert(writePtr_); - } - + void Map(); void Unmap(); // When using the returned memory, make sure to bind the returned vkbuf. diff --git a/ext/native/thin3d/thin3d_gl.cpp b/ext/native/thin3d/thin3d_gl.cpp index 6ba2d06b38..1331a34f2d 100644 --- a/ext/native/thin3d/thin3d_gl.cpp +++ b/ext/native/thin3d/thin3d_gl.cpp @@ -1099,7 +1099,8 @@ bool OpenGLPipeline::LinkShaders() { semantics.push_back({ SEM_TANGENT, "Tangent" }); semantics.push_back({ SEM_BINORMAL, "Binormal" }); std::vector<GLRProgram::UniformLocQuery> queries; - program_ = render_->CreateProgram(linkShaders, semantics, queries, false); + std::vector<GLRProgram::Initializer> initialize; + program_ = render_->CreateProgram(linkShaders, semantics, queries, initialize, false); return true; }