Merge pull request #427 from libretro/perf-fixup

Rework performance interface.
This commit is contained in:
Twinaphex 2013-12-18 10:39:45 -08:00
commit 88508e2fbc
12 changed files with 210 additions and 170 deletions

View File

@ -63,11 +63,10 @@ typedef struct video4linux
static void process_image(video4linux_t *v4l, const uint8_t *buffer_yuv)
{
static retro_perf_counter_t yuv_convert_direct = { "yuv_convert_direct", 0, 0, 0, false };
rarch_perf_init(&yuv_convert_direct, g_settings.perfcounter_enable);
rarch_perf_start(&yuv_convert_direct, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_INIT(yuv_convert_direct);
RARCH_PERFORMANCE_START(yuv_convert_direct);
scaler_ctx_scale(&v4l->scaler, v4l->buffer_output, buffer_yuv);
rarch_perf_stop(&yuv_convert_direct, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_STOP(yuv_convert_direct);
}
static int xioctl(int fd, int request, void *args)

View File

@ -18,6 +18,7 @@
#include "compat/strl.h"
#include "compat/posix_string.h"
#include "retroarch_logger.h"
#include "performance.h"
#include "file.h"
#include <string.h>
#include <ctype.h>
@ -417,6 +418,9 @@ void uninit_libretro_sym(void)
// No longer valid.
memset(&g_extern.system, 0, sizeof(g_extern.system));
// Performance counters no longer valid.
retro_perf_clear();
}
#ifdef NEED_DYNAMIC
@ -844,15 +848,13 @@ bool rarch_environment_cb(unsigned cmd, void *data)
{
RARCH_LOG("Environ GET_PERF_INTERFACE.\n");
struct retro_perf_callback *cb = (struct retro_perf_callback*)data;
cb->get_perf_counter = rarch_get_perf_counter;
cb->get_time_usec = rarch_get_time_usec;
cb->get_cpu_features = rarch_get_cpu_features;
cb->perf_init = rarch_perf_init;
cb->perf_start = rarch_perf_start;
cb->perf_stop = rarch_perf_stop;
cb->perf_log = rarch_perf_log;
cb->perf_logs = rarch_perf_logs;
cb->perf_register = rarch_perf_register;
cb->get_time_usec = rarch_get_time_usec;
cb->get_cpu_features = rarch_get_cpu_features;
cb->get_perf_counter = rarch_get_perf_counter;
cb->perf_register = retro_perf_register; // libretro specific path.
cb->perf_start = rarch_perf_start;
cb->perf_stop = rarch_perf_stop;
cb->perf_log = retro_perf_log; // libretro specific path.
break;
}

View File

@ -292,7 +292,6 @@ struct settings
bool rgui_show_start_screen;
#endif
bool fps_show;
bool perfcounter_enable;
};
enum rarch_game_type

View File

@ -1315,14 +1315,13 @@ static void gl_pbo_async_readback(void *data)
glPixelStorei(GL_PACK_ALIGNMENT, get_alignment(gl->vp.width * sizeof(uint32_t)));
// Read asynchronously into PBO buffer.
static retro_perf_counter_t async_readback = { "async_readback", 0, 0, 0, false };
rarch_perf_init(&async_readback, g_settings.perfcounter_enable);
rarch_perf_start(&async_readback, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_INIT(async_readback);
RARCH_PERFORMANCE_START(async_readback);
glReadBuffer(GL_BACK);
glReadPixels(gl->vp.x, gl->vp.y,
gl->vp.width, gl->vp.height,
GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
rarch_perf_stop(&async_readback, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_STOP(async_readback);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
}
@ -1373,9 +1372,8 @@ static inline void gl_draw_texture(void *data)
static bool gl_frame(void *data, const void *frame, unsigned width, unsigned height, unsigned pitch, const char *msg)
{
static retro_perf_counter_t frame_run = { "frame_run", 0, 0, 0, false };
rarch_perf_init(&frame_run, g_settings.perfcounter_enable);
rarch_perf_start(&frame_run, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_INIT(frame_run);
RARCH_PERFORMANCE_START(frame_run);
gl_t *gl = (gl_t*)data;
@ -1420,12 +1418,11 @@ static bool gl_frame(void *data, const void *frame, unsigned width, unsigned hei
if (!gl->hw_render_fbo_init)
#endif
{
static retro_perf_counter_t copy_frame = { "copy_frame", 0, 0, 0, false };
gl_update_input_size(gl, width, height, pitch, true);
rarch_perf_init(&copy_frame, g_settings.perfcounter_enable);
rarch_perf_start(&copy_frame, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_INIT(copy_frame);
RARCH_PERFORMANCE_START(copy_frame);
gl_copy_frame(gl, frame, width, height, pitch);
rarch_perf_stop(&copy_frame, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_STOP(copy_frame);
}
}
else
@ -1504,7 +1501,7 @@ static bool gl_frame(void *data, const void *frame, unsigned width, unsigned hei
context_update_window_title_func();
rarch_perf_stop(&frame_run, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_STOP(frame_run);
#ifdef HAVE_FBO
// Reset state which could easily mess up libretro core.
@ -1530,9 +1527,8 @@ static bool gl_frame(void *data, const void *frame, unsigned width, unsigned hei
#ifdef HAVE_GL_SYNC
if (g_settings.video.hard_sync && gl->have_sync)
{
static retro_perf_counter_t gl_fence = {"gl_fence", 0, 0, 0, false};
rarch_perf_init(&gl_fence, g_settings.perfcounter_enable);
rarch_perf_start(&gl_fence, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_INIT(gl_fence);
RARCH_PERFORMANCE_START(gl_fence);
glClear(GL_COLOR_BUFFER_BIT);
gl->fences[gl->fence_count++] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
@ -1545,7 +1541,7 @@ static bool gl_frame(void *data, const void *frame, unsigned width, unsigned hei
memmove(gl->fences, gl->fences + 1, gl->fence_count * sizeof(GLsync));
}
rarch_perf_stop(&gl_fence, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_STOP(gl_fence);
}
#endif
@ -2341,13 +2337,12 @@ static void gl_viewport_info(void *data, struct rarch_viewport *vp)
static bool gl_read_viewport(void *data, uint8_t *buffer)
{
static retro_perf_counter_t read_viewport = { "read_viewport", 0, 0, 0, false };
unsigned i;
gl_t *gl = (gl_t*)data;
(void)i;
rarch_perf_init(&read_viewport, g_settings.perfcounter_enable);
rarch_perf_start(&read_viewport, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_INIT(read_viewport);
RARCH_PERFORMANCE_START(read_viewport);
#ifdef HAVE_FBO
// Make sure we're reading from backbuffer incase some state has been overridden.
@ -2403,7 +2398,7 @@ static bool gl_read_viewport(void *data, uint8_t *buffer)
}
#endif
rarch_perf_stop(&read_viewport, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_STOP(read_viewport);
return true;
}
#endif

View File

@ -305,11 +305,10 @@ static bool sdl_gfx_frame(void *data, const void *frame, unsigned width, unsigne
if (SDL_MUSTLOCK(vid->screen))
SDL_LockSurface(vid->screen);
static retro_perf_counter_t sdl_scale = { "sdl_scale", 0, 0, 0, false };
rarch_perf_init(&sdl_scale, g_settings.perfcounter_enable);
rarch_perf_start(&sdl_scale, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_INIT(sdl_scale);
RARCH_PERFORMANCE_START(sdl_scale);
scaler_ctx_scale(&vid->scaler, vid->screen->pixels, frame);
rarch_perf_stop(&sdl_scale, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_STOP(sdl_scale);
if (msg)
sdl_render_msg(vid, vid->screen, msg, vid->screen->w, vid->screen->h, vid->screen->format);

View File

@ -394,9 +394,8 @@ static bool thread_focus(void *data)
static bool thread_frame(void *data, const void *frame_,
unsigned width, unsigned height, unsigned pitch, const char *msg)
{
static retro_perf_counter_t thread_frame = { "thread_frame", 0, 0, 0, false};
rarch_perf_init(&thread_frame, g_settings.perfcounter_enable);
rarch_perf_start(&thread_frame, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_INIT(thread_frame);
RARCH_PERFORMANCE_START(thread_frame);
thread_video_t *thr = (thread_video_t*)data;
unsigned copy_stride = width * (thr->info.rgb32 ? sizeof(uint32_t) : sizeof(uint16_t));
@ -462,7 +461,7 @@ static bool thread_frame(void *data, const void *frame_,
slock_unlock(thr->lock);
rarch_perf_stop(&thread_frame, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_STOP(thread_frame);
thr->last_time = rarch_get_time_usec();
return true;

View File

@ -353,9 +353,8 @@ static void vg_copy_frame(void *data, const void *frame, unsigned width, unsigne
static bool vg_frame(void *data, const void *frame, unsigned width, unsigned height, unsigned pitch, const char *msg)
{
static retro_perf_counter_t vg_fr = { "vg_fr", 0, 0, 0, false };
rarch_perf_init(&vg_fr, g_settings.perfcounter_enable);
rarch_perf_start(&vg_fr, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_INIT(vg_fr);
RARCH_PERFORMANCE_START(vg_fr);
vg_t *vg = (vg_t*)data;
if (width != vg->mRenderWidth || height != vg->mRenderHeight || vg->should_resize)
@ -377,11 +376,10 @@ static bool vg_frame(void *data, const void *frame, unsigned width, unsigned hei
vgClear(0, 0, vg->mScreenWidth, vg->mScreenHeight);
vgSeti(VG_SCISSORING, VG_TRUE);
static retro_perf_counter_t vg_image = { "vg_image", 0, 0, 0, false };
rarch_perf_init(&vg_image, g_settings.perfcounter_enable);
rarch_perf_start(&vg_image, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_INIT(vg_image);
RARCH_PERFORMANCE_START(vg_image);
vg_copy_frame(vg, frame, width, height, pitch);
rarch_perf_stop(&vg_image, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_STOP(vg_image);
vgDrawImage(vg->mImage);
@ -390,7 +388,7 @@ static bool vg_frame(void *data, const void *frame, unsigned width, unsigned hei
vg->driver->update_window_title();
rarch_perf_stop(&vg_fr, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_STOP(vg_fr);
vg->driver->swap_buffers();
return true;

View File

@ -587,9 +587,9 @@ struct retro_log_callback
retro_log_printf_t log;
};
// Performance functions
// Performance related functions
//
// Id values for SIMD CPU features
// ID values for SIMD CPU features
#define RETRO_SIMD_SSE (1 << 0)
#define RETRO_SIMD_SSE2 (1 << 1)
#define RETRO_SIMD_VMX (1 << 2)
@ -599,10 +599,10 @@ struct retro_log_callback
#define RETRO_SIMD_SSE3 (1 << 6)
#define RETRO_SIMD_SSSE3 (1 << 7)
typedef unsigned long long retro_perf_tick_t;
typedef uint64_t retro_perf_tick_t;
typedef int64_t retro_time_t;
typedef struct retro_perf_counter
struct retro_perf_counter
{
const char *ident;
retro_perf_tick_t start;
@ -610,30 +610,67 @@ typedef struct retro_perf_counter
retro_perf_tick_t call_cnt;
bool registered;
} retro_perf_counter_t;
};
// Returns current time in microseconds. Tries to use the most accurate timer available.
typedef retro_time_t (*retro_perf_get_time_usec_t)(void);
// A simple counter. Usually nanoseconds, but can also be CPU cycles.
// Can be used directly if desired (when creating a more sophisticated performance counter system).
typedef retro_perf_tick_t (*retro_perf_get_counter_t)(void);
typedef void (*retro_get_cpu_features_t)(unsigned*);
typedef void (*retro_perf_init_t)(void*, bool);
typedef void (*retro_perf_start_t)(void*, bool);
typedef void (*retro_perf_stop_t)(void*, bool);
typedef void (*retro_perf_log_t)(void*, const char*, bool);
typedef void (*retro_perf_logs_t)(void);
typedef void (*retro_perf_register_t)(retro_perf_counter_t*);
// Returns a bit-mask of detected CPU features (RETRO_SIMD_*).
typedef uint64_t (*retro_get_cpu_features_t)(void);
// Asks frontend to log and/or display the state of performance counters.
// Performance counters can always be poked into manually as well.
typedef void (*retro_perf_log_t)(void);
// Register a performance counter.
// ident field must be set with a discrete value and other values in retro_perf_counter must be 0.
// Registering can be called multiple times. To avoid calling to frontend redundantly, you can check registered field first.
typedef void (*retro_perf_register_t)(struct retro_perf_counter *counter);
// Starts and stops a registered counter.
typedef void (*retro_perf_start_t)(struct retro_perf_counter *counter);
typedef void (*retro_perf_stop_t)(struct retro_perf_counter *counter);
// For convenience it can be useful to wrap register, start and stop in macros.
// E.g.:
// #ifdef LOG_PERFORMANCE
// #define RETRO_PERFORMANCE_INIT(perf_cb, name) static struct retro_perf_counter name = {#name}; if (!perf_cb.registered) perf_cb.perf_register(&(name))
// #define RETRO_PERFORMANCE_START(perf_cb, name) perf_cb.start(&(name))
// #define RETRO_PERFORMANCE_STOP(perf_cb, name) perf_cb.stop(&(name))
// #else
// ... Blank macros ...
// #endif
// These can then be used mid-functions around code snippets.
//
// extern struct retro_perf_callback perf_cb; // Somewhere in the core.
//
// void do_some_heavy_work(void)
// {
// RETRO_PERFORMANCE_INIT(cb, work_1);
// RETRO_PERFORMANCE_START(cb, work_1);
// heavy_work_1();
// RETRO_PERFORMANCE_STOP(cb, work_1);
//
// RETRO_PERFORMANCE_INIT(cb, work_2);
// RETRO_PERFORMANCE_START(cb, work_2);
// heavy_work_2();
// RETRO_PERFORMANCE_STOP(cb, work_2);
// }
//
// void retro_deinit(void)
// {
// perf_cb.perf_log(); // Log all perf counters here for example.
// }
struct retro_perf_callback
{
retro_perf_get_time_usec_t get_time_usec;
retro_perf_get_counter_t get_perf_counter;
retro_get_cpu_features_t get_cpu_features;
retro_perf_init_t perf_init;
retro_perf_get_counter_t get_perf_counter;
retro_perf_register_t perf_register;
retro_perf_start_t perf_start;
retro_perf_stop_t perf_stop;
retro_perf_log_t perf_log;
retro_perf_logs_t perf_logs;
retro_perf_register_t perf_register;
};
// FIXME: Document the sensor API and work out behavior.

View File

@ -42,11 +42,9 @@
#include <time.h>
#endif
#ifdef __QNX__
#ifndef CLOCK_MONOTONIC
#if defined(__QNX__) && !defined(CLOCK_MONOTONIC)
#define CLOCK_MONOTONIC 2
#endif
#endif
#if defined(__PSL1GHT__)
#include <sys/time.h>
@ -71,25 +69,65 @@
#include <string.h>
#define MAX_COUNTERS 64
static struct retro_perf_counter *perf_counters[MAX_COUNTERS];
static unsigned perf_ptr;
static const struct retro_perf_counter *perf_counters_rarch[MAX_COUNTERS];
static const struct retro_perf_counter *perf_counters_libretro[MAX_COUNTERS];
static unsigned perf_ptr_rarch;
static unsigned perf_ptr_libretro;
void rarch_perf_register(struct retro_perf_counter *perf)
{
if (!perf && perf_ptr >= MAX_COUNTERS)
if (perf->registered || perf_ptr_rarch >= MAX_COUNTERS)
return;
perf_counters[perf_ptr++] = perf;
perf_counters_rarch[perf_ptr_rarch++] = perf;
perf->registered = true;
}
void retro_perf_register(struct retro_perf_counter *perf)
{
if (perf->registered || perf_ptr_libretro >= MAX_COUNTERS)
return;
void rarch_perf_logs(void)
perf_counters_libretro[perf_ptr_libretro++] = perf;
perf->registered = true;
}
void retro_perf_clear(void)
{
perf_ptr_libretro = 0;
memset(perf_counters_libretro, 0, sizeof(perf_counters_libretro));
}
#ifdef _WIN32
#define PERF_LOG_FMT "[PERF]: Avg (%s): %I64u ticks, %I64u runs.\n"
#else
#define PERF_LOG_FMT "[PERF]: Avg (%s): %llu ticks, %llu runs.\n"
#endif
static void log_counters(const struct retro_perf_counter **counters, unsigned num)
{
unsigned i;
RARCH_LOG("[PERF]: Performance counters:\n");
for (i = 0; i < perf_ptr; i++)
rarch_perf_log(perf_counters[i], perf_counters[i]->ident, true);
for (i = 0; i < num; i++)
{
RARCH_LOG(PERF_LOG_FMT,
counters[i]->ident,
(unsigned long long)counters[i]->total / (unsigned long long)counters[i]->call_cnt,
(unsigned long long)counters[i]->call_cnt);
}
}
void rarch_perf_log(void)
{
#if defined(PERF_TEST) || !defined(RARCH_INTERNAL)
RARCH_LOG("[PERF]: Performance counters (RetroArch):\n");
log_counters(perf_counters_rarch, perf_ptr_rarch);
#endif
}
void retro_perf_log(void)
{
RARCH_LOG("[PERF]: Performance counters (libretro):\n");
log_counters(perf_counters_libretro, perf_ptr_libretro);
}
retro_perf_tick_t rarch_get_perf_counter(void)
@ -122,7 +160,7 @@ retro_perf_tick_t rarch_get_perf_counter(void)
#endif
#elif defined(__ARM_ARCH_6__)
asm volatile( "mrc p15, 0, %0, c9, c13, 0" : "=r"(time) );
asm volatile( "mrc p15, 0, %0, c9, c13, 0" : "=r"(time) );
#elif defined(__CELLOS_LV2__) || defined(GEKKO) || defined(_XBOX360)
time = __mftb();
#endif
@ -130,49 +168,6 @@ retro_perf_tick_t rarch_get_perf_counter(void)
return time;
}
void rarch_perf_init(void *data, bool enable)
{
struct retro_perf_counter *perf = (struct retro_perf_counter*)data;
if (!enable || !perf)
return;
if (!perf->registered)
rarch_perf_register(perf);
}
void rarch_perf_start(void *data, bool enable)
{
struct retro_perf_counter *perf = (struct retro_perf_counter*)data;
if (!enable || !perf)
return;
perf->call_cnt++;
perf->start = rarch_get_perf_counter();
}
void rarch_perf_stop(void *data, bool enable)
{
struct retro_perf_counter *perf = (struct retro_perf_counter*)data;
if (!enable || !perf)
return;
perf->total += rarch_get_perf_counter() - perf->start;
}
void rarch_perf_log(void *data, const char *funcname, bool enable)
{
struct retro_perf_counter *perf = (struct retro_perf_counter*)data;
if (!enable || !perf)
return;
#ifdef _WIN32
RARCH_LOG("[PERF]: Avg (%s): %I64u ticks, %I64u runs.\n",
funcname, perf->total / perf->call_cnt, perf->call_cnt);
#else
RARCH_LOG("[PERF]: Avg (%s): %llu ticks, %llu runs.\n",
funcname, perf->total / perf->call_cnt, perf->call_cnt);
#endif
}
retro_time_t rarch_get_time_usec(void)
{
#if defined(_WIN32)
@ -244,9 +239,9 @@ static void x86_cpuid(int func, int flags[4])
}
#endif
void rarch_get_cpu_features(unsigned *cpu)
uint64_t rarch_get_cpu_features(void)
{
*cpu = 0;
uint64_t cpu = 0;
#if defined(CPU_X86)
int flags[4];
@ -258,46 +253,48 @@ void rarch_get_cpu_features(unsigned *cpu)
RARCH_LOG("[CPUID]: Vendor: %s\n", vendor);
if (flags[0] < 1) // Does CPUID not support func = 1? (unlikely ...)
return;
return 0;
x86_cpuid(1, flags);
if (flags[3] & (1 << 25))
*cpu |= RETRO_SIMD_SSE;
cpu |= RETRO_SIMD_SSE;
if (flags[3] & (1 << 26))
*cpu |= RETRO_SIMD_SSE2;
cpu |= RETRO_SIMD_SSE2;
if (flags[2] & (1 << 0))
*cpu |= RETRO_SIMD_SSE3;
cpu |= RETRO_SIMD_SSE3;
if (flags[2] & (1 << 9))
*cpu |= RETRO_SIMD_SSSE3;
cpu |= RETRO_SIMD_SSSE3;
const int avx_flags = (1 << 27) | (1 << 28);
if ((flags[2] & avx_flags) == avx_flags)
*cpu |= RETRO_SIMD_AVX;
cpu |= RETRO_SIMD_AVX;
RARCH_LOG("[CPUID]: SSE: %u\n", !!(*cpu & RETRO_SIMD_SSE));
RARCH_LOG("[CPUID]: SSE2: %u\n", !!(*cpu & RETRO_SIMD_SSE2));
RARCH_LOG("[CPUID]: SSE3: %u\n", !!(*cpu & RETRO_SIMD_SSE3));
RARCH_LOG("[CPUID]: SSSE3: %u\n", !!(*cpu & RETRO_SIMD_SSSE3));
RARCH_LOG("[CPUID]: AVX: %u\n", !!(*cpu & RETRO_SIMD_AVX));
RARCH_LOG("[CPUID]: SSE: %u\n", !!(cpu & RETRO_SIMD_SSE));
RARCH_LOG("[CPUID]: SSE2: %u\n", !!(cpu & RETRO_SIMD_SSE2));
RARCH_LOG("[CPUID]: SSE3: %u\n", !!(cpu & RETRO_SIMD_SSE3));
RARCH_LOG("[CPUID]: SSSE3: %u\n", !!(cpu & RETRO_SIMD_SSSE3));
RARCH_LOG("[CPUID]: AVX: %u\n", !!(cpu & RETRO_SIMD_AVX));
#elif defined(ANDROID) && defined(ANDROID_ARM)
uint64_t cpu_flags = android_getCpuFeatures();
if (cpu_flags & ANDROID_CPU_ARM_FEATURE_NEON)
*cpu |= RETRO_SIMD_NEON;
cpu |= RETRO_SIMD_NEON;
RARCH_LOG("[CPUID]: NEON: %u\n", !!(*cpu & RETRO_SIMD_NEON));
RARCH_LOG("[CPUID]: NEON: %u\n", !!(cpu & RETRO_SIMD_NEON));
#elif defined(HAVE_NEON)
*cpu |= RETRO_SIMD_NEON;
RARCH_LOG("[CPUID]: NEON: %u\n", !!(*cpu & RETRO_SIMD_NEON));
RARCH_LOG("[CPUID]: NEON: %u\n", !!(cpu & RETRO_SIMD_NEON));
#elif defined(__CELLOS_LV2__)
*cpu |= RETRO_SIMD_VMX;
RARCH_LOG("[CPUID]: VMX: %u\n", !!(*cpu & RETRO_SIMD_VMX));
RARCH_LOG("[CPUID]: VMX: %u\n", !!(cpu & RETRO_SIMD_VMX));
#elif defined(XBOX360)
*cpu |= RETRO_SIMD_VMX128;
RARCH_LOG("[CPUID]: VMX128: %u\n", !!(*cpu & RETRO_SIMD_VMX128));
RARCH_LOG("[CPUID]: VMX128: %u\n", !!(cpu & RETRO_SIMD_VMX128));
#endif
return cpu;
}

View File

@ -26,19 +26,45 @@ extern "C" {
#endif
#include "boolean.h"
#include "libretro.h"
#include <stdint.h>
retro_perf_tick_t rarch_get_perf_counter(void);
retro_time_t rarch_get_time_usec(void);
void rarch_perf_register(struct retro_perf_counter *perf);
void rarch_perf_logs(void);
void retro_perf_register(struct retro_perf_counter *perf); // Same as rarch_perf_register, just for libretro cores.
void retro_perf_clear(void);
void rarch_perf_log(void);
void retro_perf_log(void);
void rarch_get_cpu_features(unsigned *cpu);
static inline void rarch_perf_start(struct retro_perf_counter *perf)
{
perf->call_cnt++;
perf->start = rarch_get_perf_counter();
}
void rarch_perf_init(void *data, bool enable);
void rarch_perf_start(void *data, bool enable);
void rarch_perf_stop(void *data, bool enable);
void rarch_perf_log(void *data, const char *funcname, bool enable);
static inline void rarch_perf_stop(struct retro_perf_counter *perf)
{
perf->total += rarch_get_perf_counter() - perf->start;
}
uint64_t rarch_get_cpu_features(void);
// Used internally by RetroArch.
#if defined(PERF_TEST) || !defined(RARCH_INTERNAL)
#define RARCH_PERFORMANCE_INIT(X) \
static struct retro_perf_counter X = {#X}; \
do { \
if (!(X).registered) \
rarch_perf_register(&(X)); \
} while(0)
#define RARCH_PERFORMANCE_START(X) rarch_perf_start(&(X))
#define RARCH_PERFORMANCE_STOP(X) rarch_perf_stop(&(X))
#else
#define RARCH_PERFORMANCE_INIT(X)
#define RARCH_PERFORMANCE_START(X)
#define RARCH_PERFORMANCE_STOP(X)
#endif
#ifdef __cplusplus
}

View File

@ -276,9 +276,8 @@ static void video_frame(const void *data, unsigned width, unsigned height, size_
if (g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_0RGB1555 && data && data != RETRO_HW_FRAME_BUFFER_VALID)
{
static retro_perf_counter_t video_frame_conv = { "video_frame_conv", 0, 0, 0, false };
rarch_perf_init(&video_frame_conv, g_settings.perfcounter_enable);
rarch_perf_start(&video_frame_conv, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_INIT(video_frame_conv);
RARCH_PERFORMANCE_START(video_frame_conv);
driver.scaler.in_width = width;
driver.scaler.in_height = height;
driver.scaler.out_width = width;
@ -289,7 +288,7 @@ static void video_frame(const void *data, unsigned width, unsigned height, size_
scaler_ctx_scale(&driver.scaler, driver.scaler_out, data);
data = driver.scaler_out;
pitch = driver.scaler.out_stride;
rarch_perf_stop(&video_frame_conv, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_STOP(video_frame_conv);
}
// Slightly messy code,
@ -382,13 +381,11 @@ static bool audio_flush(const int16_t *data, size_t samples)
unsigned output_frames = 0;
struct resampler_data src_data = {0};
static retro_perf_counter_t audio_convert_s16 = { "audio_convert_s16", 0, 0, 0, false };
rarch_perf_init(&audio_convert_s16, g_settings.perfcounter_enable);
rarch_perf_start(&audio_convert_s16, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_INIT(audio_convert_s16);
RARCH_PERFORMANCE_START(audio_convert_s16);
audio_convert_s16_to_float(g_extern.audio_data.data, data, samples,
g_extern.audio_data.volume_gain);
rarch_perf_stop(&audio_convert_s16, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_STOP(audio_convert_s16);
#if defined(HAVE_DYLIB)
rarch_dsp_output_t dsp_output = {0};
@ -415,13 +412,11 @@ static bool audio_flush(const int16_t *data, size_t samples)
if (g_extern.is_slowmotion)
src_data.ratio *= g_settings.slowmotion_ratio;
static retro_perf_counter_t resampler_proc = { "resampler_proc", 0, 0, 0, false };
rarch_perf_init(&resampler_proc, g_settings.perfcounter_enable);
rarch_perf_start(&resampler_proc, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_INIT(resampler_proc);
RARCH_PERFORMANCE_START(resampler_proc);
rarch_resampler_process(g_extern.audio_data.resampler,
g_extern.audio_data.resampler_data, &src_data);
rarch_perf_stop(&resampler_proc, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_STOP(resampler_proc);
output_data = g_extern.audio_data.outsamples;
output_frames = src_data.output_frames;
@ -436,12 +431,11 @@ static bool audio_flush(const int16_t *data, size_t samples)
}
else
{
static retro_perf_counter_t audio_convert_float = { "audio_convert_float", 0, 0, 0, false };
rarch_perf_init(&audio_convert_float, g_settings.perfcounter_enable);
rarch_perf_start(&audio_convert_float, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_INIT(audio_convert_float);
RARCH_PERFORMANCE_START(audio_convert_float);
audio_convert_float_to_s16(g_extern.audio_data.conv_outsamples,
output_data, output_frames * 2);
rarch_perf_stop(&audio_convert_float, g_settings.perfcounter_enable);
RARCH_PERFORMANCE_STOP(audio_convert_float);
if (audio_write_func(g_extern.audio_data.conv_outsamples, output_frames * sizeof(int16_t) * 2) < 0)
{
@ -2889,8 +2883,7 @@ static void verify_api_version(void)
// Ideally, code would get swapped out depending on CPU support, but this will do for now.
static void validate_cpu_features(void)
{
unsigned cpu;
rarch_get_cpu_features(&cpu);
uint64_t cpu = rarch_get_cpu_features();
#define FAIL_CPU(simd_type) do { \
RARCH_ERR(simd_type " code is compiled in, but CPU does not support this feature. Cannot continue.\n"); \

View File

@ -350,10 +350,6 @@ void config_set_defaults(void)
*g_settings.rgui_config_directory = '\0';
#endif
#ifdef PERF_TEST
g_settings.perfcounter_enable = true;
#endif
#ifdef RARCH_CONSOLE
g_extern.lifecycle_state |= (1ULL << MODE_MENU);