Merge pull request #7741 from hrydgard/frame-profiler

Frame profiler overlay
This commit is contained in:
Henrik Rydgård 2015-05-26 00:42:01 +02:00
commit 43744b0239
21 changed files with 193 additions and 18 deletions

View File

@ -761,6 +761,7 @@ void Config::Load(const char *iniFileName, const char *controllerIniFilename) {
INFO_LOG(LOADER, "Loading config: %s", iniFilename_.c_str());
bSaveSettings = true;
bShowFrameProfiler = true;
IniFile iniFile;
if (!iniFile.Load(iniFilename_)) {

View File

@ -374,6 +374,9 @@ public:
bool bSkipDeadbeefFilling;
bool bFuncHashMap;
// Volatile development settings
bool bShowFrameProfiler;
std::string currentDirectory;
std::string externalDirectory;
std::string memStickDirectory;

View File

@ -22,6 +22,7 @@
#include "base/mutex.h"
#include "base/timeutil.h"
#include "input/input_state.h"
#include "profiler/profiler.h"
#include "Core/Core.h"
#include "Core/Config.h"

View File

@ -20,6 +20,8 @@
#include "native/thread/thread.h"
#include "native/thread/threadutil.h"
#include "profiler/profiler.h"
#include "Core/Core.h"
#include "Core/Config.h"
#include "Core/Debugger/Breakpoints.h"
@ -760,6 +762,7 @@ static u32 npdrmRead(FileNode *f, u8 *data, int size) {
}
static bool __IoRead(int &result, int id, u32 data_addr, int size, int &us) {
PROFILE_THIS_SCOPE("ioread");
// Low estimate, may be improved later from the ReadFile result.
us = size / 100;
if (us < 100) {

View File

@ -16,6 +16,8 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "base/basictypes.h"
#include "profiler/profiler.h"
#include "Globals.h"
#include "Core/MemMapHelpers.h"
#include "Core/HLE/sceAtrac.h"
@ -525,6 +527,8 @@ void SasInstance::MixVoice(SasVoice &voice) {
}
void SasInstance::Mix(u32 outAddr, u32 inAddr, int leftVol, int rightVol) {
PROFILE_THIS_SCOPE("mixer");
int voicesPlayingCount = 0;
for (int v = 0; v < PSP_SAS_VOICES_MAX; v++) {

View File

@ -134,7 +134,6 @@ VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) {
return match;
}
DEBUG_LOG(SCEGE, "Finding no FBO matching address %08x", addr);
return 0;
}

View File

@ -25,6 +25,8 @@
#include <string.h>
#include <algorithm>
#include "profiler/profiler.h"
#include "Common/CPUDetect.h"
#include "Core/Config.h"
@ -752,6 +754,7 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
const GEPrimitiveType primType[] = { GE_PRIM_TRIANGLES, GE_PRIM_LINES, GE_PRIM_POINTS };
void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertType) {
PROFILE_THIS_SCOPE("spline");
DispatchFlush();
// TODO: Verify correct functionality with < 4.
@ -830,6 +833,8 @@ void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indi
}
void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertType) {
PROFILE_THIS_SCOPE("bezier");
DispatchFlush();
// TODO: Verify correct functionality with < 4.

View File

@ -19,6 +19,7 @@
#include "Common/ChunkFile.h"
#include "base/logging.h"
#include "profiler/profiler.h"
#include "Core/Debugger/Breakpoints.h"
#include "Core/MemMapHelpers.h"
#include "Core/MIPS/MIPS.h"
@ -601,6 +602,7 @@ void DIRECTX9_GPU::CopyDisplayToOutputInternal() {
// Maybe should write this in ASM...
void DIRECTX9_GPU::FastRunLoop(DisplayList &list) {
PROFILE_THIS_SCOPE("gpuloop");
const CommandInfo *cmdInfo = cmdInfo_;
for (; downcount > 0; --downcount) {
// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer

View File

@ -17,6 +17,7 @@
#include "base/logging.h"
#include "gfx_es2/gl_state.h"
#include "profiler/profiler.h"
#include "Common/ChunkFile.h"
@ -681,6 +682,7 @@ void GLES_GPU::CopyDisplayToOutputInternal() {
// Maybe should write this in ASM...
void GLES_GPU::FastRunLoop(DisplayList &list) {
PROFILE_THIS_SCOPE("gpuloop");
const CommandInfo *cmdInfo = cmdInfo_;
int dc = downcount;
for (; dc > 0; --dc) {

View File

@ -29,6 +29,7 @@
#include "math/math_util.h"
#include "gfx_es2/gl_state.h"
#include "math/lin/matrix4x4.h"
#include "profiler/profiler.h"
#include "Core/Config.h"
#include "Core/Reporting.h"
@ -42,6 +43,7 @@
#include "i18n/i18n.h"
Shader::Shader(const char *code, uint32_t shaderType, bool useHWTransform, const ShaderID &shaderID) : failed_(false), useHWTransform_(useHWTransform), id_(shaderID) {
PROFILE_THIS_SCOPE("shadercomp");
source_ = code;
#ifdef SHADERLOG
OutputDebugStringUTF8(code);
@ -82,6 +84,8 @@ Shader::~Shader() {
LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTransform, LinkedShader *previous)
: useHWTransform_(useHWTransform), program(0), dirtyUniforms(0) {
PROFILE_THIS_SCOPE("shaderlink");
program = glCreateProgram();
vs_ = vs;
glAttachShader(program, vs->shader);

View File

@ -21,7 +21,8 @@
#include "StateMapping.h"
#include "native/gfx_es2/gl_state.h"
#include "gfx_es2/gl_state.h"
#include "profiler/profiler.h"
#include "GPU/Math3D.h"
#include "GPU/GPUState.h"
@ -568,6 +569,7 @@ void TransformDrawEngine::ApplyBlendState() {
}
void TransformDrawEngine::ApplyDrawState(int prim) {
// TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall.
if (gstate_c.textureChanged != TEXCHANGE_UNCHANGED && !gstate.isModeClear() && gstate.isTextureMapEnabled()) {
@ -580,6 +582,9 @@ void TransformDrawEngine::ApplyDrawState(int prim) {
}
}
// Start profiling here to skip SetTexture which is already accounted for
PROFILE_THIS_SCOPE("applydrawstate");
// Set blend - unless we need to do it in the shader.
ApplyBlendState();

View File

@ -18,6 +18,8 @@
#include <algorithm>
#include <cstring>
#include "profiler/profiler.h"
#include "Common/ColorConv.h"
#include "Core/Host.h"
#include "Core/MemMap.h"
@ -1847,6 +1849,15 @@ TextureCache::TexCacheEntry::Status TextureCache::CheckAlpha(const u32 *pixelDat
}
void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages, int scaleFactor, GLenum dstFmt) {
int w = gstate.getTextureWidth(level);
int h = gstate.getTextureHeight(level);
bool useUnpack = false;
bool useBGRA;
u32 *pixelData;
{
PROFILE_THIS_SCOPE("decodetex");
// TODO: only do this once
u32 texByteAlign = 1;
@ -1857,13 +1868,9 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac
return;
}
int w = gstate.getTextureWidth(level);
int h = gstate.getTextureHeight(level);
gpuStats.numTexturesDecoded++;
// Can restore these and remove the fixup at the end of DecodeTextureLevel on desktop GL and GLES 3.
bool useUnpack = false;
if ((g_Config.iTexScalingLevel == 1 && gl_extensions.EXT_unpack_subimage) && w != bufw) {
glPixelStorei(GL_UNPACK_ROW_LENGTH, bufw);
useUnpack = true;
@ -1871,9 +1878,9 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac
glPixelStorei(GL_UNPACK_ALIGNMENT, texByteAlign);
bool useBGRA = UseBGRA8888() && dstFmt == GL_UNSIGNED_BYTE;
useBGRA = UseBGRA8888() && dstFmt == GL_UNSIGNED_BYTE;
u32 *pixelData = (u32 *)finalBuf;
pixelData = (u32 *)finalBuf;
if (scaleFactor > 1 && (entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0)
scaler.Scale(pixelData, dstFmt, w, h, scaleFactor);
@ -1883,6 +1890,7 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac
} else {
entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
}
}
GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA;
@ -1892,8 +1900,10 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac
}
if (replaceImages) {
PROFILE_THIS_SCOPE("repltex");
glTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, w, h, components2, dstFmt, pixelData);
} else {
PROFILE_THIS_SCOPE("loadtex");
glTexImage2D(GL_TEXTURE_2D, level, components, w, h, 0, components2, dstFmt, pixelData);
if (!lowMemoryMode_) {
GLenum err = glGetError();

View File

@ -72,7 +72,8 @@
#include "Core/Config.h"
#include "Core/CoreTiming.h"
#include "native/gfx_es2/gl_state.h"
#include "gfx_es2/gl_state.h"
#include "profiler/profiler.h"
#include "GPU/Math3D.h"
#include "GPU/GPUState.h"
@ -334,6 +335,8 @@ void TransformDrawEngine::DecodeVerts() {
}
void TransformDrawEngine::DecodeVertsStep() {
PROFILE_THIS_SCOPE("vertdec");
const int i = decodeCounter_;
const DeferredDrawCall &dc = drawCalls[i];

View File

@ -691,7 +691,7 @@ void GPUCommon::ProcessDLQueueInternal() {
for (int listIndex = GetNextListIndex(); listIndex != -1; listIndex = GetNextListIndex()) {
DisplayList &l = dls[listIndex];
DEBUG_LOG(G3D, "Okay, starting DL execution at %08x - stall = %08x", l.pc, l.stall);
DEBUG_LOG(G3D, "Starting DL execution at %08x - stall = %08x", l.pc, l.stall);
if (!InterpretList(l)) {
return;
} else {

View File

@ -24,6 +24,7 @@
#include "ui/view.h"
#include "ui/viewgroup.h"
#include "ui/ui.h"
#include "profiler/profiler.h"
#include "Common/LogManager.h"
#include "Common/CPUDetect.h"
@ -69,6 +70,9 @@ void DevMenu::CreatePopupContents(UI::ViewGroup *parent) {
parent->Add(new Choice(de->T("Toggle Freeze")))->OnClick.Handle(this, &DevMenu::OnFreezeFrame);
parent->Add(new Choice(de->T("Dump Frame GPU Commands")))->OnClick.Handle(this, &DevMenu::OnDumpFrame);
parent->Add(new Choice(de->T("Toggle Audio Debug")))->OnClick.Handle(this, &DevMenu::OnToggleAudioDebug);
#ifdef USE_PROFILER
parent->Add(new CheckBox(&g_Config.bShowFrameProfiler, de->T("Frame Profiler"), ""));
#endif
RingbufferLogListener *ring = LogManager::GetInstance()->GetRingbufferListener();
if (ring) {
@ -776,3 +780,114 @@ UI::EventReturn JitCompareScreen::OnCurrentBlock(UI::EventParams &e) {
UpdateDisasm();
return UI::EVENT_DONE;
}
static const uint32_t nice_colors[8] = {
0xFF8040,
0x80FF40,
0x8040FF,
0xFFFF40,
0x40FFFF,
0xFF40FF,
0xc0c0c0,
0x8040c0,
};
void DrawProfile(UIContext &ui) {
#ifdef USE_PROFILER
int numCategories = Profiler_GetNumCategories();
int historyLength = Profiler_GetHistoryLength();
float legendWidth = 100.0f;
for (int i = 0; i < numCategories; i++) {
const char *name = Profiler_GetCategoryName(i);
float w = 0.0f, h = 0.0f;
ui.MeasureText(ui.GetFontStyle(), name, &w, &h);
if (w > legendWidth) {
legendWidth = w;
}
}
float legendStartY = ui.GetBounds().centerY();
float legendStartX = ui.GetBounds().x2() - std::min(legendWidth, 200.0f);
float rowH = 30;
const uint32_t opacity = 140 << 24;
for (int i = 0; i < numCategories; i++) {
const char *name = Profiler_GetCategoryName(i);
uint32_t color = nice_colors[i % ARRAY_SIZE(nice_colors)];
float y = legendStartY + i * rowH;
ui.FillRect(UI::Drawable(opacity | color), Bounds(legendStartX, y, rowH - 2, rowH - 2));
ui.DrawTextShadow(name, legendStartX + rowH + 2, y, 0xFFFFFFFF, ALIGN_VBASELINE);
}
float graphWidth = ui.GetBounds().x2() - 120;
float graphHeight = ui.GetBounds().h * 0.8f;
std::vector<float> history;
std::vector<float> total;
history.resize(historyLength);
total.resize(historyLength);
float dx = graphWidth / historyLength;
/*
ui.Flush();
ui.BeginNoTex();
*/
bool area = true;
static float lastMaxVal = 1.0f / 60.0f;
float minVal = 0.0f;
float maxVal = lastMaxVal; // TODO - adjust to frame length
if (maxVal < 0.001f)
maxVal = 0.001f;
if (maxVal > 1.0f / 30.0f)
maxVal = 1.0f / 30.0f;
float scale = (graphHeight) / (maxVal - minVal);
float y_60th = ui.GetBounds().y2() - 10 - (1.0f / 60.0f) * scale;
float y_1ms = ui.GetBounds().y2() - 10 - (1.0f / 1000.0f) * scale;
ui.FillRect(UI::Drawable(0x80FFFF00), Bounds(0, y_60th, graphWidth, 2));
ui.FillRect(UI::Drawable(0x80FFFF00), Bounds(0, y_1ms, graphWidth, 2));
ui.DrawTextShadow("1/60s", 5, y_60th, 0x80FFFF00);
ui.DrawTextShadow("1ms", 5, y_1ms, 0x80FFFF00);
maxVal = 0.0f;
for (int i = 0; i < numCategories; i++) {
Profiler_GetHistory(i, &history[0], historyLength);
float x = 10;
uint32_t col = nice_colors[i % ARRAY_SIZE(nice_colors)];
if (area)
col = opacity | (col & 0xFFFFFF);
UI::Drawable color(col);
if (area) {
for (int n = 0; n < historyLength; n++) {
float val = history[n];
if (val > maxVal)
maxVal = val;
float valY1 = ui.GetBounds().y2() - 10 - (val + total[n]) * scale;
float valY2 = ui.GetBounds().y2() - 10 - total[n] * scale;
ui.FillRect(color, Bounds(x, valY1, dx, valY2 - valY1));
x += dx;
total[n] += val;
}
} else {
for (int n = 0; n < historyLength; n++) {
float val = history[n];
float valY = ui.GetBounds().y2() - 10 - history[n] * scale;
ui.FillRect(color, Bounds(x, valY, dx, 5));
x += dx;
}
}
}
lastMaxVal = lastMaxVal * 0.95f + maxVal * 0.05f;
#endif
}

View File

@ -141,3 +141,6 @@ private:
UI::LinearLayout *leftDisasm_;
UI::LinearLayout *rightDisasm_;
};
void DrawProfile(UIContext &ui);

View File

@ -21,6 +21,7 @@
#include "base/display.h"
#include "base/logging.h"
#include "base/timeutil.h"
#include "profiler/profiler.h"
#include "gfx_es2/glsl_program.h"
#include "gfx_es2/gl_state.h"
@ -874,7 +875,7 @@ void EmuScreen::render() {
if (useBufferedRendering && g_Config.iGPUBackend == GPU_BACKEND_OPENGL)
fbo_unbind();
if (!osm.IsEmpty() || g_Config.bShowDebugStats || g_Config.iShowFPSCounter || g_Config.bShowTouchControls || g_Config.bShowDeveloperMenu || g_Config.bShowAudioDebug || saveStatePreview_->GetVisibility() != UI::V_GONE) {
if (!osm.IsEmpty() || g_Config.bShowDebugStats || g_Config.iShowFPSCounter || g_Config.bShowTouchControls || g_Config.bShowDeveloperMenu || g_Config.bShowAudioDebug || saveStatePreview_->GetVisibility() != UI::V_GONE || g_Config.bShowFrameProfiler) {
Thin3DContext *thin3d = screenManager()->getThin3DContext();
// This sets up some important states but not the viewport.
@ -908,6 +909,12 @@ void EmuScreen::render() {
DrawFPS(draw2d, screenManager()->getUIContext()->GetBounds());
}
#ifdef USE_PROFILER
if (g_Config.bShowFrameProfiler) {
DrawProfile(*screenManager()->getUIContext());
}
#endif
screenManager()->getUIContext()->End();
}
@ -951,4 +958,4 @@ void EmuScreen::releaseButtons() {
input.timestamp = time_now_d();
input.id = 0;
touch(input);
}
}

View File

@ -397,9 +397,12 @@ void InitPadLayout(float xres, float yres, float globalScale) {
g_Config.fUnthrottleKeyScale = scale;
}
//L and R------------------------------------------------------------
int l_key_X = 70 * scale;
int l_key_Y = 40 * scale;
// L and R------------------------------------------------------------
// Put them above the analog stick / above the buttons to the right.
// The corners were very hard to reach..
int l_key_X = 60 * scale;
int l_key_Y = yres - 110 * scale;
if (g_Config.fLKeyX == -1.0 || g_Config.fLKeyY == -1.0 ) {
g_Config.fLKeyX = (float)l_key_X / xres;
@ -408,7 +411,7 @@ void InitPadLayout(float xres, float yres, float globalScale) {
}
int r_key_X = xres - 60 * scale;
int r_key_Y = 40 * scale;
int r_key_Y = yres - 110 * scale;
if (g_Config.fRKeyX == -1.0 || g_Config.fRKeyY == -1.0 ) {
g_Config.fRKeyX = (float)r_key_X / xres;

View File

@ -48,7 +48,7 @@
#include "file/zip_read.h"
#include "thread/thread.h"
#include "net/http_client.h"
#include "gfx_es2/gl_state.h" // should've been only for screenshot - but actually not, cleanup?
#include "gfx_es2/gl_state.h" // TODO: Get rid of this from here
#include "gfx_es2/draw_text.h"
#include "gfx/gl_lost_manager.h"
#include "gfx/texture.h"
@ -57,6 +57,7 @@
#include "math/fast/fast_math.h"
#include "math/math_util.h"
#include "math/lin/matrix4x4.h"
#include "profiler/profiler.h"
#include "thin3d/thin3d.h"
#include "ui/ui.h"
#include "ui/screen.h"
@ -762,6 +763,8 @@ void HandleGlobalMessage(const std::string &msg, const std::string &value) {
}
void NativeUpdate(InputState &input) {
PROFILE_END_FRAME();
{
lock_guard lock(pendingMutex);
for (size_t i = 0; i < pendingMessages.size(); i++) {

View File

@ -24,6 +24,7 @@
#include "file/vfs.h"
#include "file/zip_read.h"
#include "base/NativeApp.h"
#include "profiler/profiler.h"
#include "thread/threadutil.h"
#include "util/text/utf8.h"
@ -342,6 +343,7 @@ int WINAPI WinMain(HINSTANCE _hInstance, HINSTANCE hPrevInstance, LPSTR szCmdLin
CoInitializeEx(NULL, COINIT_MULTITHREADED);
PROFILE_INIT();
// FMA3 support in the 2013 CRT is broken on Vista and Windows 7 RTM (fixed in SP1). Just disable it.
#ifdef _M_X64

2
native

@ -1 +1 @@
Subproject commit a24f22d4e77bd2c92fc319e452835531ce76ab2f
Subproject commit 81a78e81a90d6394a22dce786975d04abbc40402