From 1c9895611b4f091331979fd807dcc63c26070a9b Mon Sep 17 00:00:00 2001 From: Themaister Date: Wed, 18 Dec 2013 19:10:57 +0100 Subject: [PATCH 1/2] Rework performance interface. Avoids super-ugly macros and retroarch-ifdefs. --- camera/video4linux2.c | 7 +-- dynamic.c | 20 +++--- general.h | 1 - gfx/gl.c | 35 +++++------ gfx/sdl_gfx.c | 7 +-- gfx/thread_wrapper.c | 7 +-- gfx/vg.c | 14 ++--- libretro.h | 71 ++++++++++++++++----- performance.c | 143 +++++++++++++++++++++--------------------- performance.h | 38 +++++++++-- retroarch.c | 33 ++++------ settings.c | 4 -- 12 files changed, 210 insertions(+), 170 deletions(-) diff --git a/camera/video4linux2.c b/camera/video4linux2.c index a3bd6c8440..e2c104ab52 100644 --- a/camera/video4linux2.c +++ b/camera/video4linux2.c @@ -63,11 +63,10 @@ typedef struct video4linux static void process_image(video4linux_t *v4l, const uint8_t *buffer_yuv) { - static retro_perf_counter_t yuv_convert_direct = { "yuv_convert_direct", 0, 0, 0, false }; - rarch_perf_init(&yuv_convert_direct, g_settings.perfcounter_enable); - rarch_perf_start(&yuv_convert_direct, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_INIT(yuv_convert_direct); + RARCH_PERFORMANCE_START(yuv_convert_direct); scaler_ctx_scale(&v4l->scaler, v4l->buffer_output, buffer_yuv); - rarch_perf_stop(&yuv_convert_direct, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_STOP(yuv_convert_direct); } static int xioctl(int fd, int request, void *args) diff --git a/dynamic.c b/dynamic.c index a70c721c9c..cffeaf44df 100644 --- a/dynamic.c +++ b/dynamic.c @@ -18,6 +18,7 @@ #include "compat/strl.h" #include "compat/posix_string.h" #include "retroarch_logger.h" +#include "performance.h" #include "file.h" #include #include @@ -417,6 +418,9 @@ void uninit_libretro_sym(void) // No longer valid. memset(&g_extern.system, 0, sizeof(g_extern.system)); + + // Performance counters no longer valid. + retro_perf_clear(); } #ifdef NEED_DYNAMIC @@ -844,15 +848,13 @@ bool rarch_environment_cb(unsigned cmd, void *data) { RARCH_LOG("Environ GET_PERF_INTERFACE.\n"); struct retro_perf_callback *cb = (struct retro_perf_callback*)data; - cb->get_perf_counter = rarch_get_perf_counter; - cb->get_time_usec = rarch_get_time_usec; - cb->get_cpu_features = rarch_get_cpu_features; - cb->perf_init = rarch_perf_init; - cb->perf_start = rarch_perf_start; - cb->perf_stop = rarch_perf_stop; - cb->perf_log = rarch_perf_log; - cb->perf_logs = rarch_perf_logs; - cb->perf_register = rarch_perf_register; + cb->get_time_usec = rarch_get_time_usec; + cb->get_cpu_features = rarch_get_cpu_features; + cb->get_perf_counter = rarch_get_perf_counter; + cb->perf_register = retro_perf_register; // libretro specific path. + cb->perf_start = rarch_perf_start; + cb->perf_stop = rarch_perf_stop; + cb->perf_log = retro_perf_log; // libretro specific path. break; } diff --git a/general.h b/general.h index 572cca6726..9e9bcde0fe 100644 --- a/general.h +++ b/general.h @@ -292,7 +292,6 @@ struct settings bool rgui_show_start_screen; #endif bool fps_show; - bool perfcounter_enable; }; enum rarch_game_type diff --git a/gfx/gl.c b/gfx/gl.c index 1bb273071d..91dec027b3 100644 --- a/gfx/gl.c +++ b/gfx/gl.c @@ -1315,14 +1315,13 @@ static void gl_pbo_async_readback(void *data) glPixelStorei(GL_PACK_ALIGNMENT, get_alignment(gl->vp.width * sizeof(uint32_t))); // Read asynchronously into PBO buffer. - static retro_perf_counter_t async_readback = { "async_readback", 0, 0, 0, false }; - rarch_perf_init(&async_readback, g_settings.perfcounter_enable); - rarch_perf_start(&async_readback, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_INIT(async_readback); + RARCH_PERFORMANCE_START(async_readback); glReadBuffer(GL_BACK); glReadPixels(gl->vp.x, gl->vp.y, gl->vp.width, gl->vp.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL); - rarch_perf_stop(&async_readback, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_STOP(async_readback); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); } @@ -1373,9 +1372,8 @@ static inline void gl_draw_texture(void *data) static bool gl_frame(void *data, const void *frame, unsigned width, unsigned height, unsigned pitch, const char *msg) { - static retro_perf_counter_t frame_run = { "frame_run", 0, 0, 0, false }; - rarch_perf_init(&frame_run, g_settings.perfcounter_enable); - rarch_perf_start(&frame_run, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_INIT(frame_run); + RARCH_PERFORMANCE_START(frame_run); gl_t *gl = (gl_t*)data; @@ -1420,12 +1418,11 @@ static bool gl_frame(void *data, const void *frame, unsigned width, unsigned hei if (!gl->hw_render_fbo_init) #endif { - static retro_perf_counter_t copy_frame = { "copy_frame", 0, 0, 0, false }; gl_update_input_size(gl, width, height, pitch, true); - rarch_perf_init(©_frame, g_settings.perfcounter_enable); - rarch_perf_start(©_frame, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_INIT(copy_frame); + RARCH_PERFORMANCE_START(copy_frame); gl_copy_frame(gl, frame, width, height, pitch); - rarch_perf_stop(©_frame, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_STOP(copy_frame); } } else @@ -1504,7 +1501,7 @@ static bool gl_frame(void *data, const void *frame, unsigned width, unsigned hei context_update_window_title_func(); - rarch_perf_stop(&frame_run, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_STOP(frame_run); #ifdef HAVE_FBO // Reset state which could easily mess up libretro core. @@ -1530,9 +1527,8 @@ static bool gl_frame(void *data, const void *frame, unsigned width, unsigned hei #ifdef HAVE_GL_SYNC if (g_settings.video.hard_sync && gl->have_sync) { - static retro_perf_counter_t gl_fence = {"gl_fence", 0, 0, 0, false}; - rarch_perf_init(&gl_fence, g_settings.perfcounter_enable); - rarch_perf_start(&gl_fence, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_INIT(gl_fence); + RARCH_PERFORMANCE_START(gl_fence); glClear(GL_COLOR_BUFFER_BIT); gl->fences[gl->fence_count++] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); @@ -1545,7 +1541,7 @@ static bool gl_frame(void *data, const void *frame, unsigned width, unsigned hei memmove(gl->fences, gl->fences + 1, gl->fence_count * sizeof(GLsync)); } - rarch_perf_stop(&gl_fence, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_STOP(gl_fence); } #endif @@ -2341,13 +2337,12 @@ static void gl_viewport_info(void *data, struct rarch_viewport *vp) static bool gl_read_viewport(void *data, uint8_t *buffer) { - static retro_perf_counter_t read_viewport = { "read_viewport", 0, 0, 0, false }; unsigned i; gl_t *gl = (gl_t*)data; (void)i; - rarch_perf_init(&read_viewport, g_settings.perfcounter_enable); - rarch_perf_start(&read_viewport, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_INIT(read_viewport); + RARCH_PERFORMANCE_START(read_viewport); #ifdef HAVE_FBO // Make sure we're reading from backbuffer incase some state has been overridden. @@ -2403,7 +2398,7 @@ static bool gl_read_viewport(void *data, uint8_t *buffer) } #endif - rarch_perf_stop(&read_viewport, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_STOP(read_viewport); return true; } #endif diff --git a/gfx/sdl_gfx.c b/gfx/sdl_gfx.c index 76354c87eb..b4522b88fc 100644 --- a/gfx/sdl_gfx.c +++ b/gfx/sdl_gfx.c @@ -305,11 +305,10 @@ static bool sdl_gfx_frame(void *data, const void *frame, unsigned width, unsigne if (SDL_MUSTLOCK(vid->screen)) SDL_LockSurface(vid->screen); - static retro_perf_counter_t sdl_scale = { "sdl_scale", 0, 0, 0, false }; - rarch_perf_init(&sdl_scale, g_settings.perfcounter_enable); - rarch_perf_start(&sdl_scale, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_INIT(sdl_scale); + RARCH_PERFORMANCE_START(sdl_scale); scaler_ctx_scale(&vid->scaler, vid->screen->pixels, frame); - rarch_perf_stop(&sdl_scale, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_STOP(sdl_scale); if (msg) sdl_render_msg(vid, vid->screen, msg, vid->screen->w, vid->screen->h, vid->screen->format); diff --git a/gfx/thread_wrapper.c b/gfx/thread_wrapper.c index 2a1248ef0a..2de5a0c830 100644 --- a/gfx/thread_wrapper.c +++ b/gfx/thread_wrapper.c @@ -394,9 +394,8 @@ static bool thread_focus(void *data) static bool thread_frame(void *data, const void *frame_, unsigned width, unsigned height, unsigned pitch, const char *msg) { - static retro_perf_counter_t thread_frame = { "thread_frame", 0, 0, 0, false}; - rarch_perf_init(&thread_frame, g_settings.perfcounter_enable); - rarch_perf_start(&thread_frame, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_INIT(thread_frame); + RARCH_PERFORMANCE_START(thread_frame); thread_video_t *thr = (thread_video_t*)data; unsigned copy_stride = width * (thr->info.rgb32 ? sizeof(uint32_t) : sizeof(uint16_t)); @@ -462,7 +461,7 @@ static bool thread_frame(void *data, const void *frame_, slock_unlock(thr->lock); - rarch_perf_stop(&thread_frame, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_STOP(thread_frame); thr->last_time = rarch_get_time_usec(); return true; diff --git a/gfx/vg.c b/gfx/vg.c index fc0b5a5b68..f6dbaa3697 100644 --- a/gfx/vg.c +++ b/gfx/vg.c @@ -353,9 +353,8 @@ static void vg_copy_frame(void *data, const void *frame, unsigned width, unsigne static bool vg_frame(void *data, const void *frame, unsigned width, unsigned height, unsigned pitch, const char *msg) { - static retro_perf_counter_t vg_fr = { "vg_fr", 0, 0, 0, false }; - rarch_perf_init(&vg_fr, g_settings.perfcounter_enable); - rarch_perf_start(&vg_fr, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_INIT(vg_fr); + RARCH_PERFORMANCE_START(vg_fr); vg_t *vg = (vg_t*)data; if (width != vg->mRenderWidth || height != vg->mRenderHeight || vg->should_resize) @@ -377,11 +376,10 @@ static bool vg_frame(void *data, const void *frame, unsigned width, unsigned hei vgClear(0, 0, vg->mScreenWidth, vg->mScreenHeight); vgSeti(VG_SCISSORING, VG_TRUE); - static retro_perf_counter_t vg_image = { "vg_image", 0, 0, 0, false }; - rarch_perf_init(&vg_image, g_settings.perfcounter_enable); - rarch_perf_start(&vg_image, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_INIT(vg_image); + RARCH_PERFORMANCE_START(vg_image); vg_copy_frame(vg, frame, width, height, pitch); - rarch_perf_stop(&vg_image, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_STOP(vg_image); vgDrawImage(vg->mImage); @@ -390,7 +388,7 @@ static bool vg_frame(void *data, const void *frame, unsigned width, unsigned hei vg->driver->update_window_title(); - rarch_perf_stop(&vg_fr, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_STOP(vg_fr); vg->driver->swap_buffers(); return true; diff --git a/libretro.h b/libretro.h index abf1750c16..c1c1c6cf17 100755 --- a/libretro.h +++ b/libretro.h @@ -587,9 +587,9 @@ struct retro_log_callback retro_log_printf_t log; }; -// Performance functions +// Performance related functions // -// Id values for SIMD CPU features +// ID values for SIMD CPU features #define RETRO_SIMD_SSE (1 << 0) #define RETRO_SIMD_SSE2 (1 << 1) #define RETRO_SIMD_VMX (1 << 2) @@ -599,10 +599,10 @@ struct retro_log_callback #define RETRO_SIMD_SSE3 (1 << 6) #define RETRO_SIMD_SSSE3 (1 << 7) -typedef unsigned long long retro_perf_tick_t; +typedef uint64_t retro_perf_tick_t; typedef int64_t retro_time_t; -typedef struct retro_perf_counter +struct retro_perf_counter { const char *ident; retro_perf_tick_t start; @@ -610,30 +610,67 @@ typedef struct retro_perf_counter retro_perf_tick_t call_cnt; bool registered; -} retro_perf_counter_t; - +}; +// Returns current time in microsec sec. Tries to use the most accurate timer available. typedef retro_time_t (*retro_perf_get_time_usec_t)(void); +// A simple counter. Usually nanoseconds, but can also be CPU cycles. +// Can be used directly if desired (when creating a more sophisticated performance counter system). typedef retro_perf_tick_t (*retro_perf_get_counter_t)(void); -typedef void (*retro_get_cpu_features_t)(unsigned*); -typedef void (*retro_perf_init_t)(void*, bool); -typedef void (*retro_perf_start_t)(void*, bool); -typedef void (*retro_perf_stop_t)(void*, bool); -typedef void (*retro_perf_log_t)(void*, const char*, bool); -typedef void (*retro_perf_logs_t)(void); -typedef void (*retro_perf_register_t)(retro_perf_counter_t*); +// Returns a bit-mask of detected CPU features (RETRO_SIMD_*). +typedef uint64_t (*retro_get_cpu_features_t)(void); +// Asks frontend to log and/or display the state of performance counters. +// Performance counters can always be poked into manually as well. +typedef void (*retro_perf_log_t)(void); +// Register a performance counter. +// ident field must be set with a discrete value and other values in retro_perf_counter must be 0. +// Registering can be called multiple times. To avoid calling to frontend redundantly, you can check registered field first. +typedef void (*retro_perf_register_t)(struct retro_perf_counter *counter); +// Starts and stops a registered counter. +typedef void (*retro_perf_start_t)(struct retro_perf_counter *counter); +typedef void (*retro_perf_stop_t)(struct retro_perf_counter *counter); + +// For convenience it can be useful to wrap register, start and stop in macros. +// E.g.: +// #ifdef LOG_PERFORMANCE +// #define RETRO_PERFORMANCE_INIT(perf_cb, name) static struct retro_perf_counter name = {#name}; if (!perf_cb.registered) perf_cb.perf_register(&(name)) +// #define RETRO_PERFORMANCE_START(perf_cb, name) perf_cb.start(&(name)) +// #define RETRO_PERFORMANCE_STOP(perf_cb, name) perf_cb.stop(&(name)) +// #else +// ... Blank macros ... +// #endif +// These can then be used mid-functions around code snippets. +// +// extern struct retro_perf_callback perf_cb; // Somewhere in the core. +// +// void do_some_heavy_work(void) +// { +// RETRO_PERFORMANCE_INIT(cb, work_1); +// RETRO_PERFORMANCE_START(cb, work_1); +// heavy_work_1(); +// RETRO_PERFORMANCE_STOP(cb, work_1); +// +// RETRO_PERFORMANCE_INIT(cb, work_2); +// RETRO_PERFORMANCE_START(cb, work_2); +// heavy_work_2(); +// RETRO_PERFORMANCE_STOP(cb, work_2); +// } +// +// void retro_deinit(void) +// { +// perf_cb.perf_log(); // Log all perf counters here for example. +// } struct retro_perf_callback { retro_perf_get_time_usec_t get_time_usec; - retro_perf_get_counter_t get_perf_counter; retro_get_cpu_features_t get_cpu_features; - retro_perf_init_t perf_init; + + retro_perf_get_counter_t get_perf_counter; + retro_perf_register_t perf_register; retro_perf_start_t perf_start; retro_perf_stop_t perf_stop; retro_perf_log_t perf_log; - retro_perf_logs_t perf_logs; - retro_perf_register_t perf_register; }; // FIXME: Document the sensor API and work out behavior. diff --git a/performance.c b/performance.c index 769bdd1f45..9ce7788256 100644 --- a/performance.c +++ b/performance.c @@ -42,11 +42,9 @@ #include #endif -#ifdef __QNX__ -#ifndef CLOCK_MONOTONIC +#if defined(__QNX__) && !defined(CLOCK_MONOTONIC) #define CLOCK_MONOTONIC 2 #endif -#endif #if defined(__PSL1GHT__) #include @@ -71,25 +69,65 @@ #include #define MAX_COUNTERS 64 -static struct retro_perf_counter *perf_counters[MAX_COUNTERS]; -static unsigned perf_ptr; +static const struct retro_perf_counter *perf_counters_rarch[MAX_COUNTERS]; +static const struct retro_perf_counter *perf_counters_libretro[MAX_COUNTERS]; +static unsigned perf_ptr_rarch; +static unsigned perf_ptr_libretro; void rarch_perf_register(struct retro_perf_counter *perf) { - if (!perf && perf_ptr >= MAX_COUNTERS) + if (perf->registered || perf_ptr_rarch >= MAX_COUNTERS) return; - perf_counters[perf_ptr++] = perf; + perf_counters_rarch[perf_ptr_rarch++] = perf; perf->registered = true; } +void retro_perf_register(struct retro_perf_counter *perf) +{ + if (perf->registered || perf_ptr_libretro >= MAX_COUNTERS) + return; -void rarch_perf_logs(void) + perf_counters_libretro[perf_ptr_libretro++] = perf; + perf->registered = true; +} + +void retro_perf_clear(void) +{ + perf_ptr_libretro = 0; + memset(perf_counters_libretro, 0, sizeof(perf_counters_libretro)); +} + +#ifdef _WIN32 +#define PERF_LOG_FMT "[PERF]: Avg (%s): %I64u ticks, %I64u runs.\n" +#else +#define PERF_LOG_FMT "[PERF]: Avg (%s): %llu ticks, %llu runs.\n" +#endif + +static void log_counters(const struct retro_perf_counter **counters, unsigned num) { unsigned i; - RARCH_LOG("[PERF]: Performance counters:\n"); - for (i = 0; i < perf_ptr; i++) - rarch_perf_log(perf_counters[i], perf_counters[i]->ident, true); + for (i = 0; i < num; i++) + { + RARCH_LOG(PERF_LOG_FMT, + counters[i]->ident, + (unsigned long long)counters[i]->total / (unsigned long long)counters[i]->call_cnt, + (unsigned long long)counters[i]->call_cnt); + } +} + +void rarch_perf_log(void) +{ +#if defined(PERF_TEST) || !defined(RARCH_INTERNAL) + RARCH_LOG("[PERF]: Performance counters (RetroArch):\n"); + log_counters(perf_counters_rarch, perf_ptr_rarch); +#endif +} + +void retro_perf_log(void) +{ + RARCH_LOG("[PERF]: Performance counters (libretro):\n"); + log_counters(perf_counters_libretro, perf_ptr_libretro); } retro_perf_tick_t rarch_get_perf_counter(void) @@ -122,7 +160,7 @@ retro_perf_tick_t rarch_get_perf_counter(void) #endif #elif defined(__ARM_ARCH_6__) - asm volatile( "mrc p15, 0, %0, c9, c13, 0" : "=r"(time) ); + asm volatile( "mrc p15, 0, %0, c9, c13, 0" : "=r"(time) ); #elif defined(__CELLOS_LV2__) || defined(GEKKO) || defined(_XBOX360) time = __mftb(); #endif @@ -130,49 +168,6 @@ retro_perf_tick_t rarch_get_perf_counter(void) return time; } -void rarch_perf_init(void *data, bool enable) -{ - struct retro_perf_counter *perf = (struct retro_perf_counter*)data; - if (!enable || !perf) - return; - - if (!perf->registered) - rarch_perf_register(perf); -} - -void rarch_perf_start(void *data, bool enable) -{ - struct retro_perf_counter *perf = (struct retro_perf_counter*)data; - if (!enable || !perf) - return; - - perf->call_cnt++; - perf->start = rarch_get_perf_counter(); -} - -void rarch_perf_stop(void *data, bool enable) -{ - struct retro_perf_counter *perf = (struct retro_perf_counter*)data; - if (!enable || !perf) - return; - - perf->total += rarch_get_perf_counter() - perf->start; -} - -void rarch_perf_log(void *data, const char *funcname, bool enable) -{ - struct retro_perf_counter *perf = (struct retro_perf_counter*)data; - if (!enable || !perf) - return; -#ifdef _WIN32 - RARCH_LOG("[PERF]: Avg (%s): %I64u ticks, %I64u runs.\n", - funcname, perf->total / perf->call_cnt, perf->call_cnt); -#else - RARCH_LOG("[PERF]: Avg (%s): %llu ticks, %llu runs.\n", - funcname, perf->total / perf->call_cnt, perf->call_cnt); -#endif -} - retro_time_t rarch_get_time_usec(void) { #if defined(_WIN32) @@ -244,9 +239,9 @@ static void x86_cpuid(int func, int flags[4]) } #endif -void rarch_get_cpu_features(unsigned *cpu) +uint64_t rarch_get_cpu_features(void) { - *cpu = 0; + uint64_t cpu = 0; #if defined(CPU_X86) int flags[4]; @@ -258,46 +253,48 @@ void rarch_get_cpu_features(unsigned *cpu) RARCH_LOG("[CPUID]: Vendor: %s\n", vendor); if (flags[0] < 1) // Does CPUID not support func = 1? (unlikely ...) - return; + return 0; x86_cpuid(1, flags); if (flags[3] & (1 << 25)) - *cpu |= RETRO_SIMD_SSE; + cpu |= RETRO_SIMD_SSE; if (flags[3] & (1 << 26)) - *cpu |= RETRO_SIMD_SSE2; + cpu |= RETRO_SIMD_SSE2; if (flags[2] & (1 << 0)) - *cpu |= RETRO_SIMD_SSE3; + cpu |= RETRO_SIMD_SSE3; if (flags[2] & (1 << 9)) - *cpu |= RETRO_SIMD_SSSE3; + cpu |= RETRO_SIMD_SSSE3; const int avx_flags = (1 << 27) | (1 << 28); if ((flags[2] & avx_flags) == avx_flags) - *cpu |= RETRO_SIMD_AVX; + cpu |= RETRO_SIMD_AVX; - RARCH_LOG("[CPUID]: SSE: %u\n", !!(*cpu & RETRO_SIMD_SSE)); - RARCH_LOG("[CPUID]: SSE2: %u\n", !!(*cpu & RETRO_SIMD_SSE2)); - RARCH_LOG("[CPUID]: SSE3: %u\n", !!(*cpu & RETRO_SIMD_SSE3)); - RARCH_LOG("[CPUID]: SSSE3: %u\n", !!(*cpu & RETRO_SIMD_SSSE3)); - RARCH_LOG("[CPUID]: AVX: %u\n", !!(*cpu & RETRO_SIMD_AVX)); + RARCH_LOG("[CPUID]: SSE: %u\n", !!(cpu & RETRO_SIMD_SSE)); + RARCH_LOG("[CPUID]: SSE2: %u\n", !!(cpu & RETRO_SIMD_SSE2)); + RARCH_LOG("[CPUID]: SSE3: %u\n", !!(cpu & RETRO_SIMD_SSE3)); + RARCH_LOG("[CPUID]: SSSE3: %u\n", !!(cpu & RETRO_SIMD_SSSE3)); + RARCH_LOG("[CPUID]: AVX: %u\n", !!(cpu & RETRO_SIMD_AVX)); #elif defined(ANDROID) && defined(ANDROID_ARM) uint64_t cpu_flags = android_getCpuFeatures(); if (cpu_flags & ANDROID_CPU_ARM_FEATURE_NEON) - *cpu |= RETRO_SIMD_NEON; + cpu |= RETRO_SIMD_NEON; - RARCH_LOG("[CPUID]: NEON: %u\n", !!(*cpu & RETRO_SIMD_NEON)); + RARCH_LOG("[CPUID]: NEON: %u\n", !!(cpu & RETRO_SIMD_NEON)); #elif defined(HAVE_NEON) *cpu |= RETRO_SIMD_NEON; - RARCH_LOG("[CPUID]: NEON: %u\n", !!(*cpu & RETRO_SIMD_NEON)); + RARCH_LOG("[CPUID]: NEON: %u\n", !!(cpu & RETRO_SIMD_NEON)); #elif defined(__CELLOS_LV2__) *cpu |= RETRO_SIMD_VMX; - RARCH_LOG("[CPUID]: VMX: %u\n", !!(*cpu & RETRO_SIMD_VMX)); + RARCH_LOG("[CPUID]: VMX: %u\n", !!(cpu & RETRO_SIMD_VMX)); #elif defined(XBOX360) *cpu |= RETRO_SIMD_VMX128; - RARCH_LOG("[CPUID]: VMX128: %u\n", !!(*cpu & RETRO_SIMD_VMX128)); + RARCH_LOG("[CPUID]: VMX128: %u\n", !!(cpu & RETRO_SIMD_VMX128)); #endif + + return cpu; } diff --git a/performance.h b/performance.h index 24967c1438..624b6b4b38 100644 --- a/performance.h +++ b/performance.h @@ -26,19 +26,45 @@ extern "C" { #endif #include "boolean.h" +#include "libretro.h" #include retro_perf_tick_t rarch_get_perf_counter(void); retro_time_t rarch_get_time_usec(void); void rarch_perf_register(struct retro_perf_counter *perf); -void rarch_perf_logs(void); +void retro_perf_register(struct retro_perf_counter *perf); // Same as rarch_perf_register, just for libretro cores. +void retro_perf_clear(void); +void rarch_perf_log(void); +void retro_perf_log(void); -void rarch_get_cpu_features(unsigned *cpu); +static inline void rarch_perf_start(struct retro_perf_counter *perf) +{ + perf->call_cnt++; + perf->start = rarch_get_perf_counter(); +} -void rarch_perf_init(void *data, bool enable); -void rarch_perf_start(void *data, bool enable); -void rarch_perf_stop(void *data, bool enable); -void rarch_perf_log(void *data, const char *funcname, bool enable); +static inline void rarch_perf_stop(struct retro_perf_counter *perf) +{ + perf->total += rarch_get_perf_counter() - perf->start; +} + +uint64_t rarch_get_cpu_features(void); + +// Used internally by RetroArch. +#if defined(PERF_TEST) || !defined(RARCH_INTERNAL) +#define RARCH_PERFORMANCE_INIT(X) \ + static struct retro_perf_counter X = {#X}; \ + do { \ + if (!(X).registered) \ + rarch_perf_register(&(X)); \ + } while(0) +#define RARCH_PERFORMANCE_START(X) rarch_perf_start(&(X)) +#define RARCH_PERFORMANCE_STOP(X) rarch_perf_stop(&(X)) +#else +#define RARCH_PERFORMANCE_INIT(X) +#define RARCH_PERFORMANCE_START(X) +#define RARCH_PERFORMANCE_STOP(X) +#endif #ifdef __cplusplus } diff --git a/retroarch.c b/retroarch.c index caa2f3bd0f..64cba817f8 100644 --- a/retroarch.c +++ b/retroarch.c @@ -276,9 +276,8 @@ static void video_frame(const void *data, unsigned width, unsigned height, size_ if (g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_0RGB1555 && data && data != RETRO_HW_FRAME_BUFFER_VALID) { - static retro_perf_counter_t video_frame_conv = { "video_frame_conv", 0, 0, 0, false }; - rarch_perf_init(&video_frame_conv, g_settings.perfcounter_enable); - rarch_perf_start(&video_frame_conv, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_INIT(video_frame_conv); + RARCH_PERFORMANCE_START(video_frame_conv); driver.scaler.in_width = width; driver.scaler.in_height = height; driver.scaler.out_width = width; @@ -289,7 +288,7 @@ static void video_frame(const void *data, unsigned width, unsigned height, size_ scaler_ctx_scale(&driver.scaler, driver.scaler_out, data); data = driver.scaler_out; pitch = driver.scaler.out_stride; - rarch_perf_stop(&video_frame_conv, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_STOP(video_frame_conv); } // Slightly messy code, @@ -382,13 +381,11 @@ static bool audio_flush(const int16_t *data, size_t samples) unsigned output_frames = 0; struct resampler_data src_data = {0}; - static retro_perf_counter_t audio_convert_s16 = { "audio_convert_s16", 0, 0, 0, false }; - - rarch_perf_init(&audio_convert_s16, g_settings.perfcounter_enable); - rarch_perf_start(&audio_convert_s16, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_INIT(audio_convert_s16); + RARCH_PERFORMANCE_START(audio_convert_s16); audio_convert_s16_to_float(g_extern.audio_data.data, data, samples, g_extern.audio_data.volume_gain); - rarch_perf_stop(&audio_convert_s16, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_STOP(audio_convert_s16); #if defined(HAVE_DYLIB) rarch_dsp_output_t dsp_output = {0}; @@ -415,13 +412,11 @@ static bool audio_flush(const int16_t *data, size_t samples) if (g_extern.is_slowmotion) src_data.ratio *= g_settings.slowmotion_ratio; - static retro_perf_counter_t resampler_proc = { "resampler_proc", 0, 0, 0, false }; - - rarch_perf_init(&resampler_proc, g_settings.perfcounter_enable); - rarch_perf_start(&resampler_proc, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_INIT(resampler_proc); + RARCH_PERFORMANCE_START(resampler_proc); rarch_resampler_process(g_extern.audio_data.resampler, g_extern.audio_data.resampler_data, &src_data); - rarch_perf_stop(&resampler_proc, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_STOP(resampler_proc); output_data = g_extern.audio_data.outsamples; output_frames = src_data.output_frames; @@ -436,12 +431,11 @@ static bool audio_flush(const int16_t *data, size_t samples) } else { - static retro_perf_counter_t audio_convert_float = { "audio_convert_float", 0, 0, 0, false }; - rarch_perf_init(&audio_convert_float, g_settings.perfcounter_enable); - rarch_perf_start(&audio_convert_float, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_INIT(audio_convert_float); + RARCH_PERFORMANCE_START(audio_convert_float); audio_convert_float_to_s16(g_extern.audio_data.conv_outsamples, output_data, output_frames * 2); - rarch_perf_stop(&audio_convert_float, g_settings.perfcounter_enable); + RARCH_PERFORMANCE_STOP(audio_convert_float); if (audio_write_func(g_extern.audio_data.conv_outsamples, output_frames * sizeof(int16_t) * 2) < 0) { @@ -2889,8 +2883,7 @@ static void verify_api_version(void) // Ideally, code would get swapped out depending on CPU support, but this will do for now. static void validate_cpu_features(void) { - unsigned cpu; - rarch_get_cpu_features(&cpu); + uint64_t cpu = rarch_get_cpu_features(); #define FAIL_CPU(simd_type) do { \ RARCH_ERR(simd_type " code is compiled in, but CPU does not support this feature. Cannot continue.\n"); \ diff --git a/settings.c b/settings.c index cbb82595a9..4054faecb1 100644 --- a/settings.c +++ b/settings.c @@ -350,10 +350,6 @@ void config_set_defaults(void) *g_settings.rgui_config_directory = '\0'; #endif -#ifdef PERF_TEST - g_settings.perfcounter_enable = true; -#endif - #ifdef RARCH_CONSOLE g_extern.lifecycle_state |= (1ULL << MODE_MENU); From d1f80a38fcb9ad3dbebbe71a538feffe0515356c Mon Sep 17 00:00:00 2001 From: Themaister Date: Wed, 18 Dec 2013 19:34:51 +0100 Subject: [PATCH 2/2] Fix typo. --- libretro.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libretro.h b/libretro.h index c1c1c6cf17..c584786949 100755 --- a/libretro.h +++ b/libretro.h @@ -612,7 +612,7 @@ struct retro_perf_counter bool registered; }; -// Returns current time in microsec sec. Tries to use the most accurate timer available. +// Returns current time in microseconds. Tries to use the most accurate timer available. typedef retro_time_t (*retro_perf_get_time_usec_t)(void); // A simple counter. Usually nanoseconds, but can also be CPU cycles. // Can be used directly if desired (when creating a more sophisticated performance counter system).