Reasonably correct viewport handling. Optional debug stats overlay (not always 100% working).

This commit is contained in:
Henrik Rydgard 2012-11-26 17:35:08 +01:00
parent 3d4bc24525
commit 870ea6628b
18 changed files with 154 additions and 58 deletions

View File

@ -16,6 +16,7 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <vector>
#include <cmath>
// TODO: Move the relevant parts into common. Don't want the core
// to be dependent on "native", I think. Or maybe should get rid of common
@ -144,32 +145,44 @@ void hleEnterVblank(u64 userdata, int cyclesLate)
gpuStats.numFrames++;
// This doesn't work very well yet. PPGe is probably not a great choice to do custom overlays
// as we're not really sure which framebuffer it will end up in at this point.
if (false && g_Config.bShowDebugStats)
{
char stats[512];
sprintf(stats,
"Frames: %i\n"
"Draw calls: %i\n"
"Textures loaded: %i\n",
gpuStats.numFrames,
gpuStats.numDrawCalls,
TextureCache_NumLoadedTextures());
/*
PPGeBegin();
PPGeDrawText(stats, 2, 2, 0, 0.3f, 0x90000000);
PPGeDrawText(stats, 0, 0, 0, 0.3f);
PPGeEnd();
*/
gpuStats.resetFrame();
}
// Yeah, this has to be the right moment to end the frame. Give the graphics backend opportunity
// to blit the framebuffer, in order to support half-framerate games that otherwise wouldn't have
// anything to draw here.
gpu->CopyDisplayToOutput();
// Now we can subvert the Ge engine in order to draw custom overlays like stat counters etc.
// Here we will be drawing to the non buffered front surface.
if (g_Config.bShowDebugStats)
{
gpu->UpdateStats();
char stats[512];
sprintf(stats,
"Frames: %i\n"
"Draw calls: %i\n"
"Vertices Transformed: %i\n"
"Textures active: %i\n"
"Vertex shaders loaded: %i\n"
"Fragment shaders loaded: %i\n"
"Combined shaders loaded: %i\n",
gpuStats.numFrames,
gpuStats.numDrawCalls,
gpuStats.numVertsTransformed,
gpuStats.numTextures,
gpuStats.numVertexShaders,
gpuStats.numFragmentShaders,
gpuStats.numShaders
);
float zoom = 0.7f * sqrtf(g_Config.iWindowZoom);
PPGeBegin();
PPGeDrawText(stats, 2, 2, 0, zoom, 0x90000000);
PPGeDrawText(stats, 0, 0, 0, zoom);
PPGeEnd();
gpuStats.resetFrame();
}
host->EndFrame();
#ifdef _WIN32
@ -187,7 +200,6 @@ void hleEnterVblank(u64 userdata, int cyclesLate)
}
#endif
host->BeginFrame();
gpu->BeginFrame();

View File

@ -25,6 +25,7 @@
#include "../MemMap.h"
#include "image/zim_load.h"
#include "gfx/texture_atlas.h"
#include "gfx/gl_common.h"
#include "../System.h"
static u32 atlasPtr;
@ -213,7 +214,6 @@ void PPGeEnd()
gpu->EnableInterrupts(true);
sceGeRestoreContext(savedContextPtr);
}
}
static void PPGeMeasureText(const char *text, float scale, float *w, float *h) {

View File

@ -36,6 +36,11 @@ void __PPGeShutdown();
void PPGeBegin();
void PPGeEnd();
// If you want to draw using this texture but not go through the PSP GE emulation,
// jsut call this. Will bind the texture to unit 0.
void PPGeBindTexture();
enum {
PPGE_ALIGN_LEFT = 0,
PPGE_ALIGN_RIGHT = 16,

View File

@ -49,8 +49,8 @@ GLES_GPU::GLES_GPU(int renderWidth, int renderHeight)
renderHeight_(renderHeight),
dlIdGenerator(1)
{
widthFactor_ = (float)renderWidth / 480.0f;
heightFactor_ = (float)renderHeight / 272.0f;
renderWidthFactor_ = (float)renderWidth / 480.0f;
renderHeightFactor_ = (float)renderHeight / 272.0f;
// Sanity check gstate
if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) {
@ -127,6 +127,7 @@ void GLES_GPU::CopyDisplayToOutput()
glstate.blend.disable();
glstate.cullFace.disable();
glstate.depthTest.disable();
glstate.scissorTest.disable();
fbo_bind_color_as_texture(vfb->fbo, 0);
@ -192,7 +193,7 @@ void GLES_GPU::SetRenderFrameBuffer()
vfb->width = drawing_width;
vfb->height = drawing_height;
vfb->format = fmt;
vfb->fbo = fbo_create(vfb->width * widthFactor_, vfb->height * heightFactor_, 1, true);
vfb->fbo = fbo_create(vfb->width * renderWidthFactor_, vfb->height * renderHeightFactor_, 1, true);
vfbs_.push_back(vfb);
fbo_bind_as_render_target(vfb->fbo);
glViewport(0, 0, renderWidth_, renderHeight_);
@ -275,7 +276,7 @@ void GLES_GPU::DrawSync(int mode)
}
// Just to get something on the screen, we'll just not subdivide correctly.
void drawBezier(int ucount, int vcount)
void GLES_GPU::DrawBezier(int ucount, int vcount)
{
u16 indices[3 * 3 * 6];
float customUV[32];
@ -371,7 +372,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff)
void *inds = 0;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE)
inds = Memory::GetPointer(gstate_c.indexAddr);
TransformAndDrawPrim(verts, inds, type, count, linkedShader);
TransformAndDrawPrim(verts, inds, type, count, linkedShader, 0, -1);
}
break;
@ -380,7 +381,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff)
{
int bz_ucount = data & 0xFF;
int bz_vcount = (data >> 8) & 0xFF;
drawBezier(bz_ucount, bz_vcount);
DrawBezier(bz_ucount, bz_vcount);
DEBUG_LOG(G3D,"DL DRAW BEZIER: %i x %i", bz_ucount, bz_vcount);
}
break;
@ -1082,6 +1083,14 @@ bool GLES_GPU::InterpretList()
return true;
}
void GLES_GPU::UpdateStats()
{
gpuStats.numVertexShaders = shaderManager.NumVertexShaders();
gpuStats.numFragmentShaders = shaderManager.NumFragmentShaders();
gpuStats.numShaders = shaderManager.NumPrograms();
gpuStats.numTextures = TextureCache_NumLoadedTextures();
}
void GLES_GPU::DoBlockTransfer()
{

View File

@ -44,8 +44,12 @@ public:
virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, int format);
virtual void CopyDisplayToOutput();
virtual void BeginFrame();
virtual void UpdateStats();
private:
// TransformPipeline.cpp
void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, LinkedShader *program, float *customUV, int forceIndexType);
void DrawBezier(int ucount, int vcount);
void DoBlockTransfer();
bool ProcessDLQueue();
bool interruptsEnabled_;
@ -57,8 +61,8 @@ private:
int renderWidth_;
int renderHeight_;
float widthFactor_;
float heightFactor_;
float renderWidthFactor_;
float renderHeightFactor_;
struct CmdProcessorState
{

View File

@ -162,8 +162,6 @@ char *GenerateFragmentShader()
}
WRITE(p, " gl_FragColor = v;\n");
//WRITE(p, " gl_FragColor = vec4(1,0,1,1);");
WRITE(p, "}\n");
return buffer;

View File

@ -88,6 +88,10 @@ public:
void DirtyShader();
void DirtyUniform(u32 what);
int NumVertexShaders() const { return (int)vsCache.size(); }
int NumFragmentShaders() const { return (int)fsCache.size(); }
int NumPrograms() const { return (int)linkedShaderCache.size(); }
private:
void Clear();

View File

@ -620,7 +620,7 @@ void PSPSetTexture()
u32 addr = texaddr;
for (i = 0; i < bufw * h; i += 2)
{
u32 index = Memory::Read_U32(addr);
u8 index = Memory::Read_U8(addr);
tmpTexBuf16[i + 0] = clut[GetClutIndex((index >> 0) & 0xf) + clutSharingOff];
tmpTexBuf16[i + 1] = clut[GetClutIndex((index >> 4) & 0xf) + clutSharingOff];
addr++;
@ -654,7 +654,7 @@ void PSPSetTexture()
u32 addr = texaddr;
for (i = 0; i < bufw * h; i += 2)
{
u32 index = Memory::Read_U32(addr);
u8 index = Memory::Read_U8(addr);
tmpTexBuf32[i + 0] = clut[GetClutIndex((index >> 0) & 0xf) + clutSharingOff];
tmpTexBuf32[i + 1] = clut[GetClutIndex((index >> 4) & 0xf) + clutSharingOff];
addr++;

View File

@ -29,6 +29,7 @@
#include "TransformPipeline.h"
#include "VertexDecoder.h"
#include "ShaderManager.h"
#include "DisplayListInterpreter.h"
GLuint glprim[8] =
{
@ -195,7 +196,7 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[
// primitives correctly. Other primitives are possible to transform and light in hardware
// using vertex shader, which will be way, way faster, especially on mobile. This has
// not yet been implemented though.
void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, LinkedShader *program, float *customUV, int forceIndexType)
void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, LinkedShader *program, float *customUV, int forceIndexType)
{
int indexLowerBound, indexUpperBound;
// First, decode the verts and apply morphing
@ -217,6 +218,7 @@ void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, Li
gpuStats.numDrawCalls++;
gpuStats.numVertsTransformed += vertexCount;
bool throughmode = (gstate.vertType & GE_VTYPE_THROUGH_MASK) != 0;
// Then, transform and draw in one big swoop (urgh!)
// need to move this to the shader.
@ -252,7 +254,7 @@ void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, Li
float c1[4] = {0, 0, 0, 0};
float uv[2] = {0, 0};
if (gstate.vertType & GE_VTYPE_THROUGH_MASK)
if (throughmode)
{
// Do not touch the coordinates or the colors. No lighting.
for (int j=0; j<3; j++)
@ -616,8 +618,64 @@ void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, Li
glstate.depthFunc.set(ztests[depthTestFunc]);
}
bool wantDepthWrite = gstate.isModeClear() || gstate.isDepthWriteEnabled();
glstate.depthWrite.set(wantDepthWrite ? GL_TRUE : GL_FALSE);
glstate.depthRange.set(gstate_c.zOff - gstate_c.zScale, gstate_c.zOff + gstate_c.zScale);
// Debugging code to mess around with the viewport
#if 1
// We can probably use these to simply set scissors? Maybe we need to offset by regionX1/Y1
int regionX1 = gstate.region1 & 0x3FF;
int regionY1 = (gstate.region1 >> 10) & 0x3FF;
int regionX2 = (gstate.region2 & 0x3FF) + 1;
int regionY2 = ((gstate.region2 >> 10) & 0x3FF) + 1;
float offsetX = (float)(gstate.offsetx & 0xFFFF) / 16.0f;
float offsetY = (float)(gstate.offsety & 0xFFFF) / 16.0f;
if (throughmode) {
// No viewport transform here. Let's experiment with using region.
glViewport((0 + regionX1) * renderWidthFactor_, (0 - regionY1) * renderHeightFactor_, (regionX2 - regionX1) * renderWidthFactor_, (regionY2 - regionY1) * renderHeightFactor_);
} else {
// These we can turn into a glViewport call, offset by offsetX and offsetY. Math after.
float vpXa = getFloat24(gstate.viewportx1);
float vpXb = getFloat24(gstate.viewportx2);
float vpYa = getFloat24(gstate.viewporty1);
float vpYb = getFloat24(gstate.viewporty2);
float vpZa = getFloat24(gstate.viewportz1); // / 65536.0f should map it to OpenGL's 0.0-1.0 Z range
float vpZb = getFloat24(gstate.viewportz2); // / 65536.0f
// The viewport transform appears to go like this:
// Xscreen = -offsetX + vpXb + vpXa * Xview
// Yscreen = -offsetY + vpYb + vpYa * Yview
// Zscreen = vpZb + vpZa * Zview
// This means that to get the analogue glViewport we must:
float vpX0 = vpXb - offsetX - vpXa;
float vpY0 = vpYb - offsetY + vpYa; // Need to account for sign of Y
float vpWidth = vpXa * 2;
float vpHeight = -vpYa * 2;
// TODO: These two should feed into glDepthRange somehow.
float vpZ0 = (vpZb - vpZa) / 65536.0f;
float vpZ1 = (vpZa * 2) / 65536.0f;
vpX0 *= renderWidthFactor_;
vpY0 *= renderHeightFactor_;
vpWidth *= renderWidthFactor_;
vpHeight *= renderHeightFactor_;
// Flip vpY0 to match the OpenGL coordinate system.
vpY0 = renderHeight_ - (vpY0 + vpHeight);
glViewport(vpX0, vpY0, vpWidth, vpHeight);
// Sadly, as glViewport takes integers, we will not be able to support sub pixel offsets this way. But meh.
}
#endif
// TODO: Make a cache for glEnableVertexAttribArray and glVertexAttribPtr states, these spam the gDebugger log.
glEnableVertexAttribArray(program->a_position);
if (useTexCoord && program->a_texcoord != -1) glEnableVertexAttribArray(program->a_texcoord);
if (program->a_color0 != -1) glEnableVertexAttribArray(program->a_color0);
@ -637,17 +695,4 @@ void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, Li
if (useTexCoord && program->a_texcoord != -1) glDisableVertexAttribArray(program->a_texcoord);
if (program->a_color0 != -1) glDisableVertexAttribArray(program->a_color0);
if (program->a_color1 != -1) glDisableVertexAttribArray(program->a_color1);
/*
if (((gstate.vertType ) & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_8BIT)
{
glDrawElements(glprim, vertexCount, GL_UNSIGNED_BYTE, inds);
}
else if (((gstate.vertType ) & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT)
{
glDrawElements(glprim, vertexCount, GL_UNSIGNED_SHORT, inds);
}
else
{*/
}

View File

@ -18,6 +18,3 @@
#pragma once
struct LinkedShader;
void TransformAndDrawPrim(void *verts, void *inds, int prim, int count, LinkedShader *shader, float *customUV = 0, int forceIndexType = -1);

View File

@ -39,6 +39,9 @@ public:
virtual void BeginFrame() = 0; // Can be a good place to draw the "memory" framebuffer for accelerated plugins
virtual void CopyDisplayToOutput() = 0;
// Tells the GPU to update the gpuStats structure.
virtual void UpdateStats() = 0;
// Internal hack to avoid interrupts from "PPGe" drawing (utility UI, etc)
virtual void EnableInterrupts(bool enable) = 0;
};

View File

@ -250,6 +250,7 @@ struct GPUgstate
inline int getBlendFuncB() const { return (blend >> 4) & 0xF; }
inline int getBlendEq() const { return (blend >> 8) & 0x7; }
inline bool isDepthTestEnabled() const { return zTestEnable & 1; }
inline bool isDepthWriteEnabled() const { return !(zmsk & 1); }
inline int getDepthTestFunc() const { return ztestfunc & 0x7; }
};
// Real data in the context ends here
@ -299,8 +300,12 @@ struct GPUStatistics
int numTextureSwitches;
int numShaderSwitches;
// Total statistics
// Total statistics, updated by the GPU core in UpdateStats
int numFrames;
int numTextures;
int numVertexShaders;
int numFragmentShaders;
int numShaders;
};
void InitGfxState();

View File

@ -823,3 +823,10 @@ bool NullGPU::InterpretList()
return true;
}
void NullGPU::UpdateStats()
{
gpuStats.numVertexShaders = 0;
gpuStats.numFragmentShaders = 0;
gpuStats.numShaders = 0;
gpuStats.numTextures = 0;
}

View File

@ -38,6 +38,7 @@ public:
virtual void BeginFrame() {}
virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, int format) {}
virtual void CopyDisplayToOutput() {}
virtual void UpdateStats();
private:
bool ProcessDLQueue();

View File

@ -342,6 +342,11 @@ namespace MainWindow
UpdateMenus();
break;
case ID_OPTIONS_SHOWDEBUGSTATISTICS:
g_Config.bShowDebugStats = !g_Config.bShowDebugStats;
UpdateMenus();
break;
case ID_FILE_EXIT:
DestroyWindow(hWnd);
break;
@ -605,6 +610,7 @@ namespace MainWindow
CHECKITEM(ID_CPU_FASTINTERPRETER,g_Config.iCpuCore == CPU_FASTINTERPRETER);
CHECKITEM(ID_CPU_DYNAREC,g_Config.iCpuCore == CPU_JIT);
CHECKITEM(ID_OPTIONS_BUFFEREDRENDERING, g_Config.bBufferedRendering);
CHECKITEM(ID_OPTIONS_SHOWDEBUGSTATISTICS, g_Config.bShowDebugStats);
BOOL enable = !Core_IsStepping();
EnableMenuItem(menu,ID_EMULATION_RUN,enable);
@ -612,8 +618,6 @@ namespace MainWindow
enable = g_State.bEmuThreadStarted;
EnableMenuItem(menu,ID_FILE_LOAD,enable);
//EnableMenuItem(menu,ID_FILE_LOAD_DOL,enable);
//EnableMenuItem(menu,ID_FILE_LOAD_ELF,enable);
EnableMenuItem(menu,ID_CPU_DYNAREC,enable);
EnableMenuItem(menu,ID_CPU_INTERPRETER,enable);
EnableMenuItem(menu,ID_CPU_FASTINTERPRETER,enable);

View File

@ -215,6 +215,7 @@ BEGIN
MENUITEM "&Toggle Full Screen\tAlt+Enter", ID_OPTIONS_FULLSCREEN, GRAYED
MENUITEM "&Display raw framebuffer", ID_OPTIONS_DISPLAYRAWFRAMEBUFFER
MENUITEM "&Buffered rendering", ID_OPTIONS_BUFFEREDRENDERING
MENUITEM "&Show debug statistics", ID_OPTIONS_SHOWDEBUGSTATISTICS
MENUITEM SEPARATOR
MENUITEM "Screen &1x", ID_OPTIONS_SCREEN1X
MENUITEM "Screen &2x", ID_OPTIONS_SCREEN2X

View File

@ -242,13 +242,14 @@
#define ID_EMULATION_FAST 40119
#define ID_EMULATION_FASTINTERPRETER 40120
#define ID_CPU_FASTINTERPRETER 40121
#define ID_OPTIONS_SHOWDEBUGSTATISTICS 40122
// Next default values for new objects
//
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE 232
#define _APS_NEXT_COMMAND_VALUE 40122
#define _APS_NEXT_COMMAND_VALUE 40123
#define _APS_NEXT_CONTROL_VALUE 1162
#define _APS_NEXT_SYMED_VALUE 101
#endif

2
native

@ -1 +1 @@
Subproject commit 5ca7692912c7bb53e1019bffbedbb09fd9d0a525
Subproject commit 8b6adf4cd698227dfc51cbbed4f26cf2baffe69c