From 3e4840df0c874753672288ffbc9046e134cf2ddf Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 10 Jan 2013 23:41:26 +0100 Subject: [PATCH 1/3] Don't lock the audio mutex if sound is off --- Core/HLE/__sceAudio.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/Core/HLE/__sceAudio.cpp b/Core/HLE/__sceAudio.cpp index 48261e457b..89338fb447 100644 --- a/Core/HLE/__sceAudio.cpp +++ b/Core/HLE/__sceAudio.cpp @@ -198,9 +198,8 @@ void __AudioUpdate() } } - section.lock(); - if (g_Config.bEnableSound) { + section.lock(); if (outAudioQueue.room() >= hwBlockSize * 2) { // Push the mixed samples onto the output audio queue. for (int i = 0; i < hwBlockSize; i++) { @@ -210,14 +209,14 @@ void __AudioUpdate() outAudioQueue.push((s16)sampleL); outAudioQueue.push((s16)sampleR); } - } else { - // This happens quite a lot. There's still something slightly off - // about the amount of audio we produce. - DEBUG_LOG(HLE, "Audio outbuffer overrun! room = %i / %i", outAudioQueue.room(), (u32)outAudioQueue.capacity()); } + section.unlock(); + } else { + // This happens quite a lot. There's still something slightly off + // about the amount of audio we produce. + DEBUG_LOG(HLE, "Audio outbuffer overrun! room = %i / %i", outAudioQueue.room(), (u32)outAudioQueue.capacity()); } - section.unlock(); } void __AudioSetOutputFrequency(int freq) From 97f3638212432756fe709b232c29a9e5c0621103 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 10 Jan 2013 23:42:46 +0100 Subject: [PATCH 2/3] Add kernelstats Useful for finding out if some syscall takes unexpectedly much CPU. --- Core/HLE/HLE.cpp | 13 +++++++++++++ Core/HLE/sceKernel.cpp | 1 + Core/HLE/sceKernel.h | 22 +++++++++++++++++++++- 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/Core/HLE/HLE.cpp b/Core/HLE/HLE.cpp index 4fcb6b93c1..c12e7b1c63 100644 --- a/Core/HLE/HLE.cpp +++ b/Core/HLE/HLE.cpp @@ -15,6 +15,7 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. +#include "base/timeutil.h" #include "HLE.h" #include #include @@ -344,6 +345,8 @@ inline void hleFinishSyscall(int modulenum, int funcnum) void CallSyscall(u32 op) { + time_update(); + double start = time_now_d(); u32 callno = (op >> 6) & 0xFFFFF; //20 bits int funcnum = callno & 0xFFF; int modulenum = (callno & 0xFF000) >> 12; @@ -365,4 +368,14 @@ void CallSyscall(u32 op) { ERROR_LOG(HLE,"Unimplemented HLE function %s", moduleDB[modulenum].funcTable[funcnum].name); } + time_update(); + double total = time_now_d() - start; + if (total > kernelStats.slowestSyscallTime) { + const char *name = moduleDB[modulenum].funcTable[funcnum].name; + if (0 != strcmp(name, "_sceKernelIdle")) { + kernelStats.slowestSyscallTime = total; + kernelStats.slowestSyscallName = name; + } + } + kernelStats.msInSyscalls += total; } diff --git a/Core/HLE/sceKernel.cpp b/Core/HLE/sceKernel.cpp index 26909f5574..1dae0dd5ab 100644 --- a/Core/HLE/sceKernel.cpp +++ b/Core/HLE/sceKernel.cpp @@ -71,6 +71,7 @@ static bool kernelRunning = false; KernelObjectPool kernelObjects; +KernelStats kernelStats; void __KernelInit() { diff --git a/Core/HLE/sceKernel.h b/Core/HLE/sceKernel.h index 0b77682136..52ad5f8ca2 100644 --- a/Core/HLE/sceKernel.h +++ b/Core/HLE/sceKernel.h @@ -377,6 +377,7 @@ public: return t; } } + template T* GetByModuleByEntryAddr(u32 entryAddr) { @@ -410,13 +411,32 @@ public: int GetCount(); private: - enum {maxCount=4096, handleOffset=0x100}; + enum { + maxCount=4096, + handleOffset=0x100 + }; KernelObject *pool[maxCount]; bool occupied[maxCount]; }; extern KernelObjectPool kernelObjects; +struct KernelStats { + void Reset() { + memset(this, 0, sizeof(this)); + } + void ResetFrame() { + msInSyscalls = 0; + slowestSyscallTime = 0; + slowestSyscallName = 0; + } + + double msInSyscalls; + double slowestSyscallTime; + const char *slowestSyscallName; +}; + +extern KernelStats kernelStats; void Register_ThreadManForUser(); void Register_LoadExecForUser(); From ca0d74762dbcf26ed79722c2bc489bd50c13f32e Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 10 Jan 2013 23:49:33 +0100 Subject: [PATCH 3/3] Some realtime profiling stuff. Enable FZ (flush-to-zero) mode. --- CMakeLists.txt | 1 + Core/HLE/sceDisplay.cpp | 8 ++++++++ GPU/GLES/DisplayListInterpreter.cpp | 2 +- GPU/GLES/Framebuffer.cpp | 3 ++- GPU/GLES/TextureCache.cpp | 12 +++++++++++- GPU/GLES/TextureCache.h | 1 + GPU/GPUCommon.cpp | 8 ++++++-- GPU/GPUState.h | 2 ++ android/jni/EmuScreen.cpp | 1 + android/jni/NativeApp.cpp | 7 +++++++ native | 2 +- 11 files changed, 41 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c85ba7a16..fcdda619e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,6 +112,7 @@ if(NOT MSVC) add_definitions(-O2) add_definitions(-Wno-multichar) add_definitions(-fno-strict-aliasing) + add_definitions(-ffast-math) if(NOT APPLE) add_definitions(-Wno-psabi) add_definitions(-D_XOPEN_SOURCE=600 -D_XOPEN_SOURCE_EXTENDED -D__BSD_VISIBLE=1) diff --git a/Core/HLE/sceDisplay.cpp b/Core/HLE/sceDisplay.cpp index e85be05e2c..d047027cdc 100644 --- a/Core/HLE/sceDisplay.cpp +++ b/Core/HLE/sceDisplay.cpp @@ -211,6 +211,9 @@ void hleEnterVblank(u64 userdata, int cyclesLate) { char stats[512]; sprintf(stats, "Frames: %i\n" + "DL processing time: %0.2f ms\n" + "Kernel processing time: %0.2f ms\n" + "Slowest syscall: %s : %0.2f ms\n" "Draw calls: %i\n" "Draw flushes: %i\n" "Vertices Transformed: %i\n" @@ -221,6 +224,10 @@ void hleEnterVblank(u64 userdata, int cyclesLate) { "Fragment shaders loaded: %i\n" "Combined shaders loaded: %i\n", gpuStats.numFrames, + gpuStats.msProcessingDisplayLists * 1000.0f, + kernelStats.msInSyscalls * 1000.0f, + kernelStats.slowestSyscallName ? kernelStats.slowestSyscallName : "(none)", + kernelStats.slowestSyscallTime * 1000.0f, gpuStats.numDrawCalls, gpuStats.numFlushes, gpuStats.numVertsTransformed, @@ -238,6 +245,7 @@ void hleEnterVblank(u64 userdata, int cyclesLate) { PPGeEnd(); gpuStats.resetFrame(); + kernelStats.ResetFrame(); } host->EndFrame(); diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index f1a9d1030d..2d7aee60e2 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -199,7 +199,7 @@ void GLES_GPU::DumpNextFrame() { } void GLES_GPU::BeginFrame() { - TextureCache_Decimate(); + TextureCache_StartFrame(); if (dumpNextFrame_) { NOTICE_LOG(G3D, "DUMPING THIS FRAME"); diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index f3a6519fa0..963560db1b 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -166,7 +166,8 @@ void FramebufferManager::DrawActiveTexture(float w, float h, bool flip) { Matrix4x4 ortho; ortho.setOrtho(0, 480, 272, 0, -1, 1); glUniformMatrix4fv(draw2dprogram->u_viewproj, 1, GL_FALSE, ortho.getReadPtr()); - + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glEnableVertexAttribArray(draw2dprogram->a_position); glEnableVertexAttribArray(draw2dprogram->a_texcoord0); glVertexAttribPointer(draw2dprogram->a_position, 3, GL_FLOAT, GL_FALSE, 12, pos); diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 534564187b..14aec11244 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -598,6 +598,13 @@ void convertColors(u8 *finalBuf, GLuint dstFmt, int numPixels) { } } +int lastBoundTexture = -1; + +void TextureCache_StartFrame() { + lastBoundTexture = -1; + TextureCache_Decimate(); +} + void PSPSetTexture() { u32 texaddr = (gstate.texaddr[0] & 0xFFFFF0) | ((gstate.texbufwidth[0]<<8) & 0xFF000000); texaddr &= 0xFFFFFFF; @@ -656,7 +663,10 @@ void PSPSetTexture() { if (match) { //got one! entry.frameCounter = gpuStats.numFrames; - glBindTexture(GL_TEXTURE_2D, entry.texture); + if (entry.texture != lastBoundTexture) { + glBindTexture(GL_TEXTURE_2D, entry.texture); + lastBoundTexture = entry.texture; + } UpdateSamplingParams(entry, false); DEBUG_LOG(G3D, "Texture at %08x Found in Cache, applying", texaddr); return; //Done! diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 7be01ad2e3..a9e062170f 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -24,6 +24,7 @@ void PSPSetTexture(); void TextureCache_Init(); void TextureCache_Shutdown(); void TextureCache_Clear(bool delete_them); +void TextureCache_StartFrame(); void TextureCache_Decimate(); // Run this once per frame to get rid of old textures. void TextureCache_Invalidate(u32 addr, int size, bool force); void TextureCache_InvalidateAll(bool force); diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index bb877840cf..b55554aa9e 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1,3 +1,4 @@ +#include "base/timeutil.h" #include "../Core/MemMap.h" #include "GeDisasm.h" #include "GPUCommon.h" @@ -41,7 +42,6 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head) void GPUCommon::UpdateStall(int listid, u32 newstall) { - for (auto iter = dlQueue.begin(); iter != dlQueue.end(); ++iter) { DisplayList &cur = *iter; @@ -56,6 +56,8 @@ void GPUCommon::UpdateStall(int listid, u32 newstall) bool GPUCommon::InterpretList(DisplayList &list) { + time_update(); + double start = time_now_d(); currentList = &list; // Reset stackptr for safety stackptr = 0; @@ -91,13 +93,15 @@ bool GPUCommon::InterpretList(DisplayList &list) list.pc += 4; prev = op; } + time_update(); + gpuStats.msProcessingDisplayLists += time_now_d() - start; return true; } bool GPUCommon::ProcessDLQueue() { DisplayListQueue::iterator iter = dlQueue.begin(); - while (!(iter == dlQueue.end())) + while (iter != dlQueue.end()) { DisplayList &l = *iter; DEBUG_LOG(G3D,"Okay, starting DL execution at %08x - stall = %08x", l.pc, l.stall); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 4b3a58a6a9..dfbd97dec8 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -263,6 +263,7 @@ struct GPUStatistics numShaderSwitches = 0; numFlushes = 0; numTexturesDecoded = 0; + msProcessingDisplayLists = 0; } // Per frame statistics @@ -274,6 +275,7 @@ struct GPUStatistics int numTextureSwitches; int numShaderSwitches; int numTexturesDecoded; + double msProcessingDisplayLists; // Total statistics, updated by the GPU core in UpdateStats int numFrames; diff --git a/android/jni/EmuScreen.cpp b/android/jni/EmuScreen.cpp index 0dffb2548f..623bec0621 100644 --- a/android/jni/EmuScreen.cpp +++ b/android/jni/EmuScreen.cpp @@ -41,6 +41,7 @@ EmuScreen::EmuScreen(const std::string &filename) : invalid_(true) { + CheckGLExtensions(); std::string fileToStart = filename; // This is probably where we should start up the emulated PSP. INFO_LOG(BOOT, "Starting up hardware."); diff --git a/android/jni/NativeApp.cpp b/android/jni/NativeApp.cpp index 9faf23f009..4877028375 100644 --- a/android/jni/NativeApp.cpp +++ b/android/jni/NativeApp.cpp @@ -31,6 +31,7 @@ #include "gfx/gl_lost_manager.h" #include "gfx/texture.h" #include "input/input_state.h" +#include "math/math_util.h" #include "math/lin/matrix4x4.h" #include "ui/screen.h" #include "ui/ui.h" @@ -155,6 +156,7 @@ void NativeGetAppInfo(std::string *app_dir_name, std::string *app_nice_name, boo void NativeInit(int argc, const char *argv[], const char *savegame_directory, const char *external_directory, const char *installID) { + EnableFZ(); std::string user_data_path = savegame_directory; // We want this to be FIRST. @@ -308,6 +310,11 @@ void NativeInitGraphics() void NativeRender() { + EnableFZ(); + // Clearing the screen at the start of the frame is an optimization for tiled mobile GPUs, as it then doesn't need to keep it around between frames. + glClearColor(0,0,0,1); + glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); + glstate.Restore(); glViewport(0, 0, pixel_xres, pixel_yres); Matrix4x4 ortho; diff --git a/native b/native index c63061ddfd..7ceecd22b4 160000 --- a/native +++ b/native @@ -1 +1 @@ -Subproject commit c63061ddfd5aec3b9dd51aa4c71150de905d8d1e +Subproject commit 7ceecd22b421c697d4d8c8ad6bda7654226be8ca