ppsspp/Common/Profiler/Profiler.cpp
Henrik Rydgård e01ca5b057
Logging API change (refactor) (#19324)
* Rename LogType to Log

* Explicitly use the Log:: enum when logging. Allows for autocomplete when editing.

* Mac/ARM64 buildfix

* Do the same with the hle result log macros

* Rename the log names to mixed case while at it.

* iOS buildfix

* Qt buildfix attempt, ARM32 buildfix
2024-07-14 14:42:59 +02:00

265 lines
7.1 KiB
C++

// Ultra-lightweight category profiler with history.
#include <algorithm>
#include <mutex>
#include <vector>
#include <cstring>
#include "ppsspp_config.h"
#include "Common/Render/DrawBuffer.h"
#include "Common/TimeUtil.h"
#include "Common/Profiler/Profiler.h"
#include "Common/Log.h"
#define MAX_CATEGORIES 64 // Can be any number, represents max profiled names.
#define MAX_DEPTH 16 // Can be any number, represents max nesting depth of profiled names.
#if PPSSPP_PLATFORM(IOS) && defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_9_0
// iOS did not support C++ thread_local before iOS 9
#define MAX_THREADS 1 // Can be any number, represents concurrent threads calling the profiler.
#else
#define MAX_THREADS 4 // Can be any number, represents concurrent threads calling the profiler.
#endif
#define HISTORY_SIZE 128 // Must be power of 2
#ifndef _DEBUG
// If the compiler can collapse identical strings, we don't even need the strcmp.
#define UNIFIED_CONST_STR
#endif
struct Category {
const char *name;
};
struct CategoryFrame {
CategoryFrame() {
memset(time_taken, 0, sizeof(time_taken));
memset(count, 0, sizeof(count));
}
float time_taken[MAX_CATEGORIES];
int count[MAX_CATEGORIES];
};
struct Profiler {
int historyPos;
int depth[MAX_THREADS];
int parentCategory[MAX_THREADS][MAX_DEPTH];
double eventStart[MAX_THREADS][MAX_CATEGORIES];
double curFrameStart;
};
static Profiler profiler;
static Category categories[MAX_CATEGORIES];
static std::mutex categoriesLock;
static int threadIdAfterLast = 0;
static std::mutex threadsLock;
static CategoryFrame *history;
#if MAX_THREADS > 1
thread_local int profilerThreadId = -1;
#else
static int profilerThreadId = 0;
#endif
void internal_profiler_init() {
memset(&profiler, 0, sizeof(profiler));
#if MAX_THREADS == 1
threadIdAfterLast = 1;
#endif
for (int i = 0; i < MAX_THREADS; i++) {
for (int j = 0; j < MAX_DEPTH; j++) {
profiler.parentCategory[i][j] = -1;
}
}
history = new CategoryFrame[HISTORY_SIZE * MAX_THREADS];
}
static int internal_profiler_find_thread() {
int thread_id = profilerThreadId;
if (thread_id != -1) {
return thread_id;
}
std::lock_guard<std::mutex> guard(threadsLock);
if (threadIdAfterLast < MAX_THREADS) {
thread_id = threadIdAfterLast++;
profilerThreadId = thread_id;
return thread_id;
}
// Just keep reusing the last one.
return threadIdAfterLast - 1;
}
int internal_profiler_find_cat(const char *category_name, bool create_missing) {
int i;
for (i = 0; i < MAX_CATEGORIES; i++) {
const char *catname = categories[i].name;
if (!catname)
break;
#ifdef UNIFIED_CONST_STR
if (catname == category_name) {
#else
if (!strcmp(catname, category_name)) {
#endif
return i;
}
}
if (i < MAX_CATEGORIES && category_name && create_missing) {
std::lock_guard<std::mutex> guard(categoriesLock);
int race_check = internal_profiler_find_cat(category_name, false);
if (race_check == -1)
categories[i].name = category_name;
return i;
}
return -1;
}
// Suspend, also used to prepare for leaving.
static void internal_profiler_suspend(int thread_id, int category, double now) {
double diff = now - profiler.eventStart[thread_id][category];
history[MAX_THREADS * profiler.historyPos + thread_id].time_taken[category] += (float)diff;
profiler.eventStart[thread_id][category] = 0.0;
}
// Resume, also used as part of entering.
static void internal_profiler_resume(int thread_id, int category, double now) {
profiler.eventStart[thread_id][category] = now;
}
int internal_profiler_enter(const char *category_name, int *out_thread_id) {
int category = internal_profiler_find_cat(category_name, true);
int thread_id = internal_profiler_find_thread();
if (category == -1 || !history) {
return category;
}
int &depth = profiler.depth[thread_id];
if (profiler.eventStart[thread_id][category] == 0.0f) {
double now = time_now_d();
int parent = profiler.parentCategory[thread_id][depth];
// Temporarily suspend the parent on entering a child.
if (parent != -1) {
internal_profiler_suspend(thread_id, parent, now);
}
internal_profiler_resume(thread_id, category, now);
} else {
DEBUG_LOG(Log::System, "profiler: recursive enter (%i - %s)", category, category_name);
}
depth++;
profiler.parentCategory[thread_id][depth] = category;
*out_thread_id = thread_id;
return category;
}
void internal_profiler_leave(int thread_id, int category) {
if (category == -1 || !history) {
return;
}
int &depth = profiler.depth[thread_id];
if (category < 0 || category >= MAX_CATEGORIES) {
ERROR_LOG(Log::System, "Bad category index %d", category);
depth--;
return;
}
double now = time_now_d();
depth--;
_assert_msg_(depth >= 0, "Profiler enter/leave mismatch!");
int parent = profiler.parentCategory[thread_id][depth];
// When there's recursion, we don't suspend or resume.
if (parent != category) {
internal_profiler_suspend(thread_id, category, now);
history[MAX_THREADS * profiler.historyPos + thread_id].count[category]++;
if (parent != -1) {
// Resume tracking the parent.
internal_profiler_resume(thread_id, parent, now);
}
}
}
void internal_profiler_end_frame() {
int thread_id = internal_profiler_find_thread();
_assert_msg_(profiler.depth[thread_id] == 0, "Can't be inside a profiler scope at end of frame!");
profiler.curFrameStart = time_now_d();
profiler.historyPos++;
profiler.historyPos &= (HISTORY_SIZE - 1);
memset(&history[MAX_THREADS * profiler.historyPos], 0, sizeof(CategoryFrame) * MAX_THREADS);
}
const char *Profiler_GetCategoryName(int i) {
return i >= 0 ? categories[i].name : "N/A";
}
int Profiler_GetHistoryLength() {
return HISTORY_SIZE;
}
int Profiler_GetNumCategories() {
for (int i = 0; i < MAX_CATEGORIES; i++) {
if (!categories[i].name)
return i;
}
return 0;
}
int Profiler_GetNumThreads() {
return threadIdAfterLast;
}
void Profiler_GetSlowestThreads(int *data, int count) {
int numCategories = Profiler_GetNumCategories();
for (int i = 0; i < HISTORY_SIZE; i++) {
int x = i - count + profiler.historyPos + 1;
while (x < 0)
x += HISTORY_SIZE;
while (x >= HISTORY_SIZE)
x -= HISTORY_SIZE;
float slowestTime = 0.0f;
data[i] = 0;
for (int thread = 0; thread < threadIdAfterLast; ++thread) {
float sum = 0.0f;
for (int c = 0; c < numCategories; ++c) {
sum += history[MAX_THREADS * x + thread].time_taken[c];
}
if (sum > slowestTime) {
slowestTime = sum;
data[i] = thread;
}
}
}
}
void Profiler_GetSlowestHistory(int category, int *slowestThreads, float *data, int count) {
for (int i = 0; i < HISTORY_SIZE; i++) {
int x = i - count + profiler.historyPos + 1;
while (x < 0)
x += HISTORY_SIZE;
while (x >= HISTORY_SIZE)
x -= HISTORY_SIZE;
int thread = slowestThreads[i];
data[i] = history[MAX_THREADS * x + thread].time_taken[category];
}
}
void Profiler_GetHistory(int category, int thread, float *data, int count) {
for (int i = 0; i < HISTORY_SIZE; i++) {
int x = i - count + profiler.historyPos + 1;
while (x < 0)
x += HISTORY_SIZE;
while (x >= HISTORY_SIZE)
x -= HISTORY_SIZE;
data[i] = history[MAX_THREADS * x + thread].time_taken[category];
}
}