diff --git a/Core/HLE/sceDisplay.cpp b/Core/HLE/sceDisplay.cpp index 2f31ccd402..94e32b2e7c 100644 --- a/Core/HLE/sceDisplay.cpp +++ b/Core/HLE/sceDisplay.cpp @@ -16,6 +16,7 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #include +#include // TODO: Move the relevant parts into common. Don't want the core // to be dependent on "native", I think. Or maybe should get rid of common @@ -144,32 +145,44 @@ void hleEnterVblank(u64 userdata, int cyclesLate) gpuStats.numFrames++; - // This doesn't work very well yet. PPGe is probably not a great choice to do custom overlays - // as we're not really sure which framebuffer it will end up in at this point. - if (false && g_Config.bShowDebugStats) - { - char stats[512]; - sprintf(stats, - "Frames: %i\n" - "Draw calls: %i\n" - "Textures loaded: %i\n", - gpuStats.numFrames, - gpuStats.numDrawCalls, - TextureCache_NumLoadedTextures()); - /* - PPGeBegin(); - PPGeDrawText(stats, 2, 2, 0, 0.3f, 0x90000000); - PPGeDrawText(stats, 0, 0, 0, 0.3f); - PPGeEnd(); - */ - gpuStats.resetFrame(); - } - // Yeah, this has to be the right moment to end the frame. Give the graphics backend opportunity // to blit the framebuffer, in order to support half-framerate games that otherwise wouldn't have // anything to draw here. gpu->CopyDisplayToOutput(); + // Now we can subvert the Ge engine in order to draw custom overlays like stat counters etc. + // Here we will be drawing to the non buffered front surface. + if (g_Config.bShowDebugStats) + { + gpu->UpdateStats(); + char stats[512]; + sprintf(stats, + "Frames: %i\n" + "Draw calls: %i\n" + "Vertices Transformed: %i\n" + "Textures active: %i\n" + "Vertex shaders loaded: %i\n" + "Fragment shaders loaded: %i\n" + "Combined shaders loaded: %i\n", + gpuStats.numFrames, + gpuStats.numDrawCalls, + gpuStats.numVertsTransformed, + gpuStats.numTextures, + gpuStats.numVertexShaders, + gpuStats.numFragmentShaders, + gpuStats.numShaders + ); + + float zoom = 0.7f * sqrtf(g_Config.iWindowZoom); + PPGeBegin(); + PPGeDrawText(stats, 2, 2, 0, zoom, 0x90000000); + PPGeDrawText(stats, 0, 0, 0, zoom); + PPGeEnd(); + + gpuStats.resetFrame(); + } + + host->EndFrame(); #ifdef _WIN32 @@ -187,7 +200,6 @@ void hleEnterVblank(u64 userdata, int cyclesLate) } #endif - host->BeginFrame(); gpu->BeginFrame(); diff --git a/Core/Util/PPGeDraw.cpp b/Core/Util/PPGeDraw.cpp index 90adb48fda..8957d6951f 100644 --- a/Core/Util/PPGeDraw.cpp +++ b/Core/Util/PPGeDraw.cpp @@ -25,6 +25,7 @@ #include "../MemMap.h" #include "image/zim_load.h" #include "gfx/texture_atlas.h" +#include "gfx/gl_common.h" #include "../System.h" static u32 atlasPtr; @@ -213,7 +214,6 @@ void PPGeEnd() gpu->EnableInterrupts(true); sceGeRestoreContext(savedContextPtr); } - } static void PPGeMeasureText(const char *text, float scale, float *w, float *h) { diff --git a/Core/Util/PPGeDraw.h b/Core/Util/PPGeDraw.h index 3ad719fe0e..ef2644fac5 100644 --- a/Core/Util/PPGeDraw.h +++ b/Core/Util/PPGeDraw.h @@ -36,6 +36,11 @@ void __PPGeShutdown(); void PPGeBegin(); void PPGeEnd(); + +// If you want to draw using this texture but not go through the PSP GE emulation, +// jsut call this. Will bind the texture to unit 0. +void PPGeBindTexture(); + enum { PPGE_ALIGN_LEFT = 0, PPGE_ALIGN_RIGHT = 16, diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 64d0ce8d99..1bc83d7a4e 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -49,8 +49,8 @@ GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) renderHeight_(renderHeight), dlIdGenerator(1) { - widthFactor_ = (float)renderWidth / 480.0f; - heightFactor_ = (float)renderHeight / 272.0f; + renderWidthFactor_ = (float)renderWidth / 480.0f; + renderHeightFactor_ = (float)renderHeight / 272.0f; // Sanity check gstate if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) { @@ -127,6 +127,7 @@ void GLES_GPU::CopyDisplayToOutput() glstate.blend.disable(); glstate.cullFace.disable(); glstate.depthTest.disable(); + glstate.scissorTest.disable(); fbo_bind_color_as_texture(vfb->fbo, 0); @@ -192,7 +193,7 @@ void GLES_GPU::SetRenderFrameBuffer() vfb->width = drawing_width; vfb->height = drawing_height; vfb->format = fmt; - vfb->fbo = fbo_create(vfb->width * widthFactor_, vfb->height * heightFactor_, 1, true); + vfb->fbo = fbo_create(vfb->width * renderWidthFactor_, vfb->height * renderHeightFactor_, 1, true); vfbs_.push_back(vfb); fbo_bind_as_render_target(vfb->fbo); glViewport(0, 0, renderWidth_, renderHeight_); @@ -275,7 +276,7 @@ void GLES_GPU::DrawSync(int mode) } // Just to get something on the screen, we'll just not subdivide correctly. -void drawBezier(int ucount, int vcount) +void GLES_GPU::DrawBezier(int ucount, int vcount) { u16 indices[3 * 3 * 6]; float customUV[32]; @@ -371,7 +372,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) void *inds = 0; if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) inds = Memory::GetPointer(gstate_c.indexAddr); - TransformAndDrawPrim(verts, inds, type, count, linkedShader); + TransformAndDrawPrim(verts, inds, type, count, linkedShader, 0, -1); } break; @@ -380,7 +381,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) { int bz_ucount = data & 0xFF; int bz_vcount = (data >> 8) & 0xFF; - drawBezier(bz_ucount, bz_vcount); + DrawBezier(bz_ucount, bz_vcount); DEBUG_LOG(G3D,"DL DRAW BEZIER: %i x %i", bz_ucount, bz_vcount); } break; @@ -1082,6 +1083,14 @@ bool GLES_GPU::InterpretList() return true; } +void GLES_GPU::UpdateStats() +{ + gpuStats.numVertexShaders = shaderManager.NumVertexShaders(); + gpuStats.numFragmentShaders = shaderManager.NumFragmentShaders(); + gpuStats.numShaders = shaderManager.NumPrograms(); + gpuStats.numTextures = TextureCache_NumLoadedTextures(); +} + void GLES_GPU::DoBlockTransfer() { diff --git a/GPU/GLES/DisplayListInterpreter.h b/GPU/GLES/DisplayListInterpreter.h index 969babf5ec..7bdf1af1d7 100644 --- a/GPU/GLES/DisplayListInterpreter.h +++ b/GPU/GLES/DisplayListInterpreter.h @@ -44,8 +44,12 @@ public: virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, int format); virtual void CopyDisplayToOutput(); virtual void BeginFrame(); + virtual void UpdateStats(); private: + // TransformPipeline.cpp + void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, LinkedShader *program, float *customUV, int forceIndexType); + void DrawBezier(int ucount, int vcount); void DoBlockTransfer(); bool ProcessDLQueue(); bool interruptsEnabled_; @@ -57,8 +61,8 @@ private: int renderWidth_; int renderHeight_; - float widthFactor_; - float heightFactor_; + float renderWidthFactor_; + float renderHeightFactor_; struct CmdProcessorState { diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index 7b4bca105a..84fd7fc411 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -162,8 +162,6 @@ char *GenerateFragmentShader() } WRITE(p, " gl_FragColor = v;\n"); - - //WRITE(p, " gl_FragColor = vec4(1,0,1,1);"); WRITE(p, "}\n"); return buffer; diff --git a/GPU/GLES/ShaderManager.h b/GPU/GLES/ShaderManager.h index 674c949377..f65791c665 100644 --- a/GPU/GLES/ShaderManager.h +++ b/GPU/GLES/ShaderManager.h @@ -88,6 +88,10 @@ public: void DirtyShader(); void DirtyUniform(u32 what); + int NumVertexShaders() const { return (int)vsCache.size(); } + int NumFragmentShaders() const { return (int)fsCache.size(); } + int NumPrograms() const { return (int)linkedShaderCache.size(); } + private: void Clear(); diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index fe6fa45d29..479cd4140b 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -620,7 +620,7 @@ void PSPSetTexture() u32 addr = texaddr; for (i = 0; i < bufw * h; i += 2) { - u32 index = Memory::Read_U32(addr); + u8 index = Memory::Read_U8(addr); tmpTexBuf16[i + 0] = clut[GetClutIndex((index >> 0) & 0xf) + clutSharingOff]; tmpTexBuf16[i + 1] = clut[GetClutIndex((index >> 4) & 0xf) + clutSharingOff]; addr++; @@ -654,7 +654,7 @@ void PSPSetTexture() u32 addr = texaddr; for (i = 0; i < bufw * h; i += 2) { - u32 index = Memory::Read_U32(addr); + u8 index = Memory::Read_U8(addr); tmpTexBuf32[i + 0] = clut[GetClutIndex((index >> 0) & 0xf) + clutSharingOff]; tmpTexBuf32[i + 1] = clut[GetClutIndex((index >> 4) & 0xf) + clutSharingOff]; addr++; diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 173d44ee2a..96a799df1e 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -29,6 +29,7 @@ #include "TransformPipeline.h" #include "VertexDecoder.h" #include "ShaderManager.h" +#include "DisplayListInterpreter.h" GLuint glprim[8] = { @@ -195,7 +196,7 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[ // primitives correctly. Other primitives are possible to transform and light in hardware // using vertex shader, which will be way, way faster, especially on mobile. This has // not yet been implemented though. -void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, LinkedShader *program, float *customUV, int forceIndexType) +void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, LinkedShader *program, float *customUV, int forceIndexType) { int indexLowerBound, indexUpperBound; // First, decode the verts and apply morphing @@ -217,6 +218,7 @@ void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, Li gpuStats.numDrawCalls++; gpuStats.numVertsTransformed += vertexCount; + bool throughmode = (gstate.vertType & GE_VTYPE_THROUGH_MASK) != 0; // Then, transform and draw in one big swoop (urgh!) // need to move this to the shader. @@ -252,7 +254,7 @@ void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, Li float c1[4] = {0, 0, 0, 0}; float uv[2] = {0, 0}; - if (gstate.vertType & GE_VTYPE_THROUGH_MASK) + if (throughmode) { // Do not touch the coordinates or the colors. No lighting. for (int j=0; j<3; j++) @@ -616,8 +618,64 @@ void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, Li glstate.depthFunc.set(ztests[depthTestFunc]); } + bool wantDepthWrite = gstate.isModeClear() || gstate.isDepthWriteEnabled(); + glstate.depthWrite.set(wantDepthWrite ? GL_TRUE : GL_FALSE); + glstate.depthRange.set(gstate_c.zOff - gstate_c.zScale, gstate_c.zOff + gstate_c.zScale); + + // Debugging code to mess around with the viewport +#if 1 + // We can probably use these to simply set scissors? Maybe we need to offset by regionX1/Y1 + int regionX1 = gstate.region1 & 0x3FF; + int regionY1 = (gstate.region1 >> 10) & 0x3FF; + int regionX2 = (gstate.region2 & 0x3FF) + 1; + int regionY2 = ((gstate.region2 >> 10) & 0x3FF) + 1; + + float offsetX = (float)(gstate.offsetx & 0xFFFF) / 16.0f; + float offsetY = (float)(gstate.offsety & 0xFFFF) / 16.0f; + + if (throughmode) { + // No viewport transform here. Let's experiment with using region. + glViewport((0 + regionX1) * renderWidthFactor_, (0 - regionY1) * renderHeightFactor_, (regionX2 - regionX1) * renderWidthFactor_, (regionY2 - regionY1) * renderHeightFactor_); + } else { + // These we can turn into a glViewport call, offset by offsetX and offsetY. Math after. + float vpXa = getFloat24(gstate.viewportx1); + float vpXb = getFloat24(gstate.viewportx2); + float vpYa = getFloat24(gstate.viewporty1); + float vpYb = getFloat24(gstate.viewporty2); + float vpZa = getFloat24(gstate.viewportz1); // / 65536.0f should map it to OpenGL's 0.0-1.0 Z range + float vpZb = getFloat24(gstate.viewportz2); // / 65536.0f + + // The viewport transform appears to go like this: + // Xscreen = -offsetX + vpXb + vpXa * Xview + // Yscreen = -offsetY + vpYb + vpYa * Yview + // Zscreen = vpZb + vpZa * Zview + + // This means that to get the analogue glViewport we must: + float vpX0 = vpXb - offsetX - vpXa; + float vpY0 = vpYb - offsetY + vpYa; // Need to account for sign of Y + float vpWidth = vpXa * 2; + float vpHeight = -vpYa * 2; + + // TODO: These two should feed into glDepthRange somehow. + float vpZ0 = (vpZb - vpZa) / 65536.0f; + float vpZ1 = (vpZa * 2) / 65536.0f; + + vpX0 *= renderWidthFactor_; + vpY0 *= renderHeightFactor_; + vpWidth *= renderWidthFactor_; + vpHeight *= renderHeightFactor_; + + // Flip vpY0 to match the OpenGL coordinate system. + vpY0 = renderHeight_ - (vpY0 + vpHeight); + glViewport(vpX0, vpY0, vpWidth, vpHeight); + // Sadly, as glViewport takes integers, we will not be able to support sub pixel offsets this way. But meh. + } + +#endif + + // TODO: Make a cache for glEnableVertexAttribArray and glVertexAttribPtr states, these spam the gDebugger log. glEnableVertexAttribArray(program->a_position); if (useTexCoord && program->a_texcoord != -1) glEnableVertexAttribArray(program->a_texcoord); if (program->a_color0 != -1) glEnableVertexAttribArray(program->a_color0); @@ -637,17 +695,4 @@ void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, Li if (useTexCoord && program->a_texcoord != -1) glDisableVertexAttribArray(program->a_texcoord); if (program->a_color0 != -1) glDisableVertexAttribArray(program->a_color0); if (program->a_color1 != -1) glDisableVertexAttribArray(program->a_color1); - - /* - if (((gstate.vertType ) & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_8BIT) - { - glDrawElements(glprim, vertexCount, GL_UNSIGNED_BYTE, inds); - } - else if (((gstate.vertType ) & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT) - { - glDrawElements(glprim, vertexCount, GL_UNSIGNED_SHORT, inds); - } - else - {*/ - } diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h index 330921778b..1abecce3ef 100644 --- a/GPU/GLES/TransformPipeline.h +++ b/GPU/GLES/TransformPipeline.h @@ -18,6 +18,3 @@ #pragma once struct LinkedShader; - - -void TransformAndDrawPrim(void *verts, void *inds, int prim, int count, LinkedShader *shader, float *customUV = 0, int forceIndexType = -1); diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index f62584fe5c..f420d0bb99 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -39,6 +39,9 @@ public: virtual void BeginFrame() = 0; // Can be a good place to draw the "memory" framebuffer for accelerated plugins virtual void CopyDisplayToOutput() = 0; + // Tells the GPU to update the gpuStats structure. + virtual void UpdateStats() = 0; + // Internal hack to avoid interrupts from "PPGe" drawing (utility UI, etc) virtual void EnableInterrupts(bool enable) = 0; }; diff --git a/GPU/GPUState.h b/GPU/GPUState.h index d7ba9f39f4..cc464f377c 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -250,6 +250,7 @@ struct GPUgstate inline int getBlendFuncB() const { return (blend >> 4) & 0xF; } inline int getBlendEq() const { return (blend >> 8) & 0x7; } inline bool isDepthTestEnabled() const { return zTestEnable & 1; } + inline bool isDepthWriteEnabled() const { return !(zmsk & 1); } inline int getDepthTestFunc() const { return ztestfunc & 0x7; } }; // Real data in the context ends here @@ -299,8 +300,12 @@ struct GPUStatistics int numTextureSwitches; int numShaderSwitches; - // Total statistics + // Total statistics, updated by the GPU core in UpdateStats int numFrames; + int numTextures; + int numVertexShaders; + int numFragmentShaders; + int numShaders; }; void InitGfxState(); diff --git a/GPU/Null/NullGpu.cpp b/GPU/Null/NullGpu.cpp index 23facd8802..12e27e7032 100644 --- a/GPU/Null/NullGpu.cpp +++ b/GPU/Null/NullGpu.cpp @@ -823,3 +823,10 @@ bool NullGPU::InterpretList() return true; } +void NullGPU::UpdateStats() +{ + gpuStats.numVertexShaders = 0; + gpuStats.numFragmentShaders = 0; + gpuStats.numShaders = 0; + gpuStats.numTextures = 0; +} diff --git a/GPU/Null/NullGpu.h b/GPU/Null/NullGpu.h index 4376db79ff..a21f8a38f9 100644 --- a/GPU/Null/NullGpu.h +++ b/GPU/Null/NullGpu.h @@ -38,6 +38,7 @@ public: virtual void BeginFrame() {} virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, int format) {} virtual void CopyDisplayToOutput() {} + virtual void UpdateStats(); private: bool ProcessDLQueue(); diff --git a/Windows/WndMainWindow.cpp b/Windows/WndMainWindow.cpp index bb9938205a..d836d76e96 100644 --- a/Windows/WndMainWindow.cpp +++ b/Windows/WndMainWindow.cpp @@ -342,6 +342,11 @@ namespace MainWindow UpdateMenus(); break; + case ID_OPTIONS_SHOWDEBUGSTATISTICS: + g_Config.bShowDebugStats = !g_Config.bShowDebugStats; + UpdateMenus(); + break; + case ID_FILE_EXIT: DestroyWindow(hWnd); break; @@ -605,6 +610,7 @@ namespace MainWindow CHECKITEM(ID_CPU_FASTINTERPRETER,g_Config.iCpuCore == CPU_FASTINTERPRETER); CHECKITEM(ID_CPU_DYNAREC,g_Config.iCpuCore == CPU_JIT); CHECKITEM(ID_OPTIONS_BUFFEREDRENDERING, g_Config.bBufferedRendering); + CHECKITEM(ID_OPTIONS_SHOWDEBUGSTATISTICS, g_Config.bShowDebugStats); BOOL enable = !Core_IsStepping(); EnableMenuItem(menu,ID_EMULATION_RUN,enable); @@ -612,8 +618,6 @@ namespace MainWindow enable = g_State.bEmuThreadStarted; EnableMenuItem(menu,ID_FILE_LOAD,enable); - //EnableMenuItem(menu,ID_FILE_LOAD_DOL,enable); - //EnableMenuItem(menu,ID_FILE_LOAD_ELF,enable); EnableMenuItem(menu,ID_CPU_DYNAREC,enable); EnableMenuItem(menu,ID_CPU_INTERPRETER,enable); EnableMenuItem(menu,ID_CPU_FASTINTERPRETER,enable); diff --git a/Windows/ppsspp.rc b/Windows/ppsspp.rc index 13f9356c6d..4187c03ae2 100644 --- a/Windows/ppsspp.rc +++ b/Windows/ppsspp.rc @@ -215,6 +215,7 @@ BEGIN MENUITEM "&Toggle Full Screen\tAlt+Enter", ID_OPTIONS_FULLSCREEN, GRAYED MENUITEM "&Display raw framebuffer", ID_OPTIONS_DISPLAYRAWFRAMEBUFFER MENUITEM "&Buffered rendering", ID_OPTIONS_BUFFEREDRENDERING + MENUITEM "&Show debug statistics", ID_OPTIONS_SHOWDEBUGSTATISTICS MENUITEM SEPARATOR MENUITEM "Screen &1x", ID_OPTIONS_SCREEN1X MENUITEM "Screen &2x", ID_OPTIONS_SCREEN2X diff --git a/Windows/resource.h b/Windows/resource.h index 043ffd7430..594a397e12 100644 --- a/Windows/resource.h +++ b/Windows/resource.h @@ -242,13 +242,14 @@ #define ID_EMULATION_FAST 40119 #define ID_EMULATION_FASTINTERPRETER 40120 #define ID_CPU_FASTINTERPRETER 40121 +#define ID_OPTIONS_SHOWDEBUGSTATISTICS 40122 // Next default values for new objects // #ifdef APSTUDIO_INVOKED #ifndef APSTUDIO_READONLY_SYMBOLS #define _APS_NEXT_RESOURCE_VALUE 232 -#define _APS_NEXT_COMMAND_VALUE 40122 +#define _APS_NEXT_COMMAND_VALUE 40123 #define _APS_NEXT_CONTROL_VALUE 1162 #define _APS_NEXT_SYMED_VALUE 101 #endif diff --git a/native b/native index 5ca7692912..8b6adf4cd6 160000 --- a/native +++ b/native @@ -1 +1 @@ -Subproject commit 5ca7692912c7bb53e1019bffbedbb09fd9d0a525 +Subproject commit 8b6adf4cd698227dfc51cbbed4f26cf2baffe69c