RetroArch/performance.c

455 lines
13 KiB
C
Raw Normal View History

/* RetroArch - A frontend for libretro.
2014-01-01 00:50:59 +00:00
* Copyright (C) 2010-2014 - Hans-Kristian Arntzen
* Copyright (C) 2011-2014 - Daniel De Matteis
*
* RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with RetroArch.
* If not, see <http://www.gnu.org/licenses/>.
*/
2013-11-27 22:36:46 +00:00
#include <stdio.h>
#include "libretro.h"
#include "performance.h"
2013-02-05 08:41:10 +00:00
#include "general.h"
#ifdef ANDROID
#include "performance/performance_android.h"
#endif
2013-02-05 08:41:10 +00:00
#if !defined(_WIN32) && !defined(RARCH_CONSOLE)
#include <unistd.h>
#endif
2012-11-01 21:31:24 +00:00
2013-12-13 02:58:58 +00:00
#if defined(_WIN32) && !defined(_XBOX)
#include <windows.h>
2013-12-29 10:48:00 +00:00
#include <intrin.h>
2013-12-13 02:58:58 +00:00
#endif
#if defined(__CELLOS_LV2__) || defined(GEKKO)
#ifndef _PPU_INTRINSICS_H
#include <ppu_intrinsics.h>
#endif
2012-10-01 20:25:13 +00:00
#elif defined(_XBOX360)
#include <PPCIntrinsics.h>
#elif defined(_POSIX_MONOTONIC_CLOCK) || defined(ANDROID) || defined(__QNX__)
2013-02-05 08:41:10 +00:00
// POSIX_MONOTONIC_CLOCK is not being defined in Android headers despite support being present.
#include <time.h>
#endif
#if defined(__QNX__) && !defined(CLOCK_MONOTONIC)
#define CLOCK_MONOTONIC 2
#endif
#if defined(__mips__)
#include <sys/time.h>
#endif
2013-02-05 08:41:10 +00:00
#if defined(__PSL1GHT__)
2012-10-16 17:55:39 +00:00
#include <sys/time.h>
2013-02-05 08:41:10 +00:00
#elif defined(__CELLOS_LV2__)
#include <sys/sys_time.h>
#endif
#ifdef GEKKO
#include <ogc/lwp_watchdog.h>
#endif
2013-02-05 08:41:10 +00:00
// OSX specific. OSX lacks clock_gettime().
#ifdef __MACH__
#include <mach/clock.h>
#include <mach/mach.h>
#include <mach/mach_time.h>
2013-02-05 08:41:10 +00:00
#endif
#ifdef EMSCRIPTEN
#include <emscripten.h>
#endif
#if defined(BSD) || defined(__APPLE__)
#include <sys/sysctl.h>
#endif
#include <string.h>
const struct retro_perf_counter *perf_counters_rarch[MAX_COUNTERS];
const struct retro_perf_counter *perf_counters_libretro[MAX_COUNTERS];
unsigned perf_ptr_rarch;
unsigned perf_ptr_libretro;
2012-10-30 22:28:54 +00:00
void rarch_perf_register(struct retro_perf_counter *perf)
2012-10-30 22:28:54 +00:00
{
if (!g_extern.perfcnt_enable || perf->registered || perf_ptr_rarch >= MAX_COUNTERS)
2012-10-30 22:28:54 +00:00
return;
perf_counters_rarch[perf_ptr_rarch++] = perf;
2012-10-30 22:28:54 +00:00
perf->registered = true;
}
void retro_perf_register(struct retro_perf_counter *perf)
{
if (perf->registered || perf_ptr_libretro >= MAX_COUNTERS)
return;
perf_counters_libretro[perf_ptr_libretro++] = perf;
perf->registered = true;
}
void retro_perf_clear(void)
{
perf_ptr_libretro = 0;
memset(perf_counters_libretro, 0, sizeof(perf_counters_libretro));
}
static void log_counters(const struct retro_perf_counter **counters, unsigned num)
2012-10-30 22:28:54 +00:00
{
unsigned i;
for (i = 0; i < num; i++)
{
2014-06-01 18:24:55 +00:00
if (counters[i]->call_cnt)
{
RARCH_LOG(PERF_LOG_FMT,
counters[i]->ident,
(unsigned long long)counters[i]->total / (unsigned long long)counters[i]->call_cnt,
(unsigned long long)counters[i]->call_cnt);
}
}
}
void rarch_perf_log(void)
{
if (!g_extern.perfcnt_enable)
return;
RARCH_LOG("[PERF]: Performance counters (RetroArch):\n");
log_counters(perf_counters_rarch, perf_ptr_rarch);
}
void retro_perf_log(void)
{
RARCH_LOG("[PERF]: Performance counters (libretro):\n");
log_counters(perf_counters_libretro, perf_ptr_libretro);
2012-10-30 22:28:54 +00:00
}
retro_perf_tick_t rarch_get_perf_counter(void)
{
retro_perf_tick_t time = 0;
#if defined(__MACH__) && defined(__APPLE__)
struct mach_timebase_info convfact;
mach_timebase_info(&convfact);
time = mach_absolute_time();
#elif defined(__linux__) || defined(__QNX__)
2012-10-16 17:55:39 +00:00
struct timespec tv;
if (clock_gettime(CLOCK_MONOTONIC, &tv) == 0)
time = (retro_perf_tick_t)tv.tv_sec * 1000000000 + (retro_perf_tick_t)tv.tv_nsec;
2012-10-16 17:55:39 +00:00
else
time = 0;
#elif defined(__GNUC__) && !defined(RARCH_CONSOLE)
2012-11-01 21:31:24 +00:00
#if defined(__i386__) || defined(__i486__) || defined(__i686__)
asm volatile ("rdtsc" : "=A" (time));
#elif defined(__x86_64__)
unsigned a, d;
asm volatile ("rdtsc" : "=a" (a), "=d" (d));
time = (retro_perf_tick_t)a | ((retro_perf_tick_t)d << 32);
#endif
2012-10-16 17:55:39 +00:00
2013-02-05 08:41:10 +00:00
#elif defined(__ARM_ARCH_6__)
asm volatile( "mrc p15, 0, %0, c9, c13, 0" : "=r"(time) );
2014-02-09 22:10:40 +00:00
#elif defined(__CELLOS_LV2__) || defined(GEKKO) || defined(_XBOX360) || defined(__powerpc__) || defined(__ppc__) || defined(__POWERPC__)
time = __mftb();
#elif defined(__mips__)
struct timeval tv;
gettimeofday(&tv,NULL);
time = (1000000 * tv.tv_sec + tv.tv_usec);
#elif defined(_WIN32)
long tv_sec, tv_usec;
static const unsigned __int64 epoch = 11644473600000000Ui64;
FILETIME file_time;
SYSTEMTIME system_time;
ULARGE_INTEGER ularge;
GetSystemTime(&system_time);
SystemTimeToFileTime(&system_time, &file_time);
ularge.LowPart = file_time.dwLowDateTime;
ularge.HighPart = file_time.dwHighDateTime;
tv_sec = (long)((ularge.QuadPart - epoch) / 10000000L);
tv_usec = (long)(system_time.wMilliseconds * 1000);
time = (1000000 * tv_sec + tv_usec);
#endif
return time;
}
2012-11-01 21:31:24 +00:00
retro_time_t rarch_get_time_usec(void)
2013-02-05 08:41:10 +00:00
{
#if defined(_WIN32)
static LARGE_INTEGER freq;
if (!freq.QuadPart && !QueryPerformanceFrequency(&freq)) // Frequency is guaranteed to not change.
return 0;
LARGE_INTEGER count;
if (!QueryPerformanceCounter(&count))
return 0;
return count.QuadPart * 1000000 / freq.QuadPart;
2013-02-05 08:41:10 +00:00
#elif defined(__CELLOS_LV2__)
return sys_time_get_system_time();
#elif defined(GEKKO)
return ticks_to_microsecs(gettime());
#elif defined(__MACH__) // OSX doesn't have clock_gettime ...
clock_serv_t cclock;
mach_timespec_t mts;
host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
clock_get_time(cclock, &mts);
mach_port_deallocate(mach_task_self(), cclock);
2013-02-05 20:42:30 +00:00
return mts.tv_sec * INT64_C(1000000) + (mts.tv_nsec + 500) / 1000;
#elif defined(_POSIX_MONOTONIC_CLOCK) || defined(__QNX__) || defined(ANDROID)
2013-02-05 08:41:10 +00:00
struct timespec tv;
if (clock_gettime(CLOCK_MONOTONIC, &tv) < 0)
return 0;
2013-02-05 20:42:30 +00:00
return tv.tv_sec * INT64_C(1000000) + (tv.tv_nsec + 500) / 1000;
#elif defined(EMSCRIPTEN)
return emscripten_get_now() * 1000;
#elif defined(__mips__)
struct timeval tv;
gettimeofday(&tv,NULL);
return (1000000 * tv.tv_sec + tv.tv_usec);
2013-02-05 08:41:10 +00:00
#else
#error "Your platform does not have a timer function implemented in rarch_get_time_usec(). Cannot continue."
#endif
}
2012-11-01 21:31:24 +00:00
#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i686__)
#define CPU_X86
#endif
2012-11-10 12:46:24 +00:00
#if defined(_MSC_VER) && !defined(_XBOX)
2012-11-01 21:31:24 +00:00
#include <intrin.h>
#endif
#ifdef CPU_X86
static void x86_cpuid(int func, int flags[4])
{
2012-11-02 20:25:54 +00:00
// On Android, we compile RetroArch with PIC, and we are not allowed to clobber the ebx
// register.
#ifdef __x86_64__
#define REG_b "rbx"
#define REG_S "rsi"
#else
#define REG_b "ebx"
#define REG_S "esi"
#endif
#if defined(__GNUC__)
asm volatile (
2012-11-03 21:22:49 +00:00
"mov %%" REG_b ", %%" REG_S "\n"
2012-11-02 20:25:54 +00:00
"cpuid\n"
2012-11-03 21:22:49 +00:00
"xchg %%" REG_b ", %%" REG_S "\n"
2012-11-02 20:25:54 +00:00
: "=a"(flags[0]), "=S"(flags[1]), "=c"(flags[2]), "=d"(flags[3])
: "a"(func));
2012-11-01 21:31:24 +00:00
#elif defined(_MSC_VER)
__cpuid(flags, func);
#else
RARCH_WARN("Unknown compiler. Cannot check CPUID with inline assembly.\n");
memset(flags, 0, 4 * sizeof(int));
2012-11-01 21:31:24 +00:00
#endif
}
2013-12-29 10:48:00 +00:00
// Only runs on i686 and above. Needs to be conditionally run.
static uint64_t xgetbv_x86(uint32_t index)
{
2013-12-29 10:48:00 +00:00
#if defined(__GNUC__)
uint32_t eax, edx;
2013-12-29 10:48:00 +00:00
asm volatile (
// Older GCC versions (Apple's GCC for example) do not understand xgetbv instruction.
// Stamp out the machine code directly.
".byte 0x0f, 0x01, 0xd0\n"
: "=a"(eax), "=d"(edx) : "c"(index));
return ((uint64_t)edx << 32) | eax;
2013-12-29 10:48:00 +00:00
#elif _MSC_FULL_VER >= 160040219 // Intrinsic only works on 2010 SP1 and above.
return _xgetbv(index);
#else
RARCH_WARN("Unknown compiler. Cannot check xgetbv bits.\n");
return 0;
#endif
}
2012-11-01 21:31:24 +00:00
#endif
#if defined(__ARM_NEON__)
static void arm_enable_runfast_mode(void)
{
2013-12-30 09:19:32 +00:00
// RunFast mode. Enables flush-to-zero and some floating point optimizations.
static const unsigned x = 0x04086060;
static const unsigned y = 0x03000000;
int r;
asm volatile(
2013-12-30 09:19:32 +00:00
"fmrx %0, fpscr \n\t" // r0 = FPSCR
"and %0, %0, %1 \n\t" // r0 = r0 & 0x04086060
"orr %0, %0, %2 \n\t" // r0 = r0 | 0x03000000
"fmxr fpscr, %0 \n\t" // FPSCR = r0
: "=r"(r)
: "r"(x), "r"(y)
);
}
#endif
unsigned rarch_get_cpu_cores(void)
{
#if defined(_WIN32) && !defined(_XBOX) // Win32
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
return sysinfo.dwNumberOfProcessors;
#elif defined(ANDROID)
return android_getCpuCount();
#elif defined(GEKKO)
return 1;
#elif defined(_SC_NPROCESSORS_ONLN) // Linux, most unix-likes.
long ret = sysconf(_SC_NPROCESSORS_ONLN);
if (ret <= 0)
2014-04-19 18:16:29 +00:00
return (unsigned)1;
return ret;
#elif defined(BSD) || defined(__APPLE__) // BSD
// Copypasta from stackoverflow, dunno if it works.
int num_cpu = 0;
int mib[4];
size_t len = sizeof(num_cpu);
mib[0] = CTL_HW;
mib[1] = HW_AVAILCPU;
sysctl(mib, 2, &num_cpu, &len, NULL, 0);
if (num_cpu < 1)
{
mib[1] = HW_NCPU;
sysctl(mib, 2, &num_cpu, &len, NULL, 0);
if (num_cpu < 1)
num_cpu = 1;
}
return num_cpu;
#elif defined(_XBOX360)
return 3;
#else
// No idea, assume single core.
return 1;
#endif
}
uint64_t rarch_get_cpu_features(void)
2012-11-01 21:31:24 +00:00
{
uint64_t cpu = 0;
2012-11-01 21:31:24 +00:00
2012-11-02 20:25:54 +00:00
#if defined(CPU_X86)
2012-11-01 21:31:24 +00:00
int flags[4];
x86_cpuid(0, flags);
char vendor[13] = {0};
const int vendor_shuffle[3] = { flags[1], flags[3], flags[2] };
memcpy(vendor, vendor_shuffle, sizeof(vendor_shuffle));
RARCH_LOG("[CPUID]: Vendor: %s\n", vendor);
unsigned max_flag = flags[0];
if (max_flag < 1) // Does CPUID not support func = 1? (unlikely ...)
return 0;
2012-11-01 21:31:24 +00:00
x86_cpuid(1, flags);
2013-12-25 20:34:03 +00:00
if (flags[3] & (1 << 23))
cpu |= RETRO_SIMD_MMX;
2012-11-05 10:57:40 +00:00
if (flags[3] & (1 << 25))
{
// SSE also implies MMXEXT (according to FFmpeg source).
cpu |= RETRO_SIMD_SSE;
cpu |= RETRO_SIMD_MMXEXT;
}
2012-11-05 10:57:40 +00:00
if (flags[3] & (1 << 26))
cpu |= RETRO_SIMD_SSE2;
2012-11-01 21:31:24 +00:00
if (flags[2] & (1 << 0))
cpu |= RETRO_SIMD_SSE3;
2013-12-12 09:56:21 +00:00
if (flags[2] & (1 << 9))
cpu |= RETRO_SIMD_SSSE3;
2013-12-12 09:56:21 +00:00
if (flags[2] & (1 << 19))
cpu |= RETRO_SIMD_SSE4;
if (flags[2] & (1 << 20))
cpu |= RETRO_SIMD_SSE42;
2012-11-05 10:57:40 +00:00
const int avx_flags = (1 << 27) | (1 << 28);
2013-12-29 10:48:00 +00:00
// Must only perform xgetbv check if we have AVX CPU support (guaranteed to have at least i686).
if (((flags[2] & avx_flags) == avx_flags) && ((xgetbv_x86(0) & 0x6) == 0x6))
cpu |= RETRO_SIMD_AVX;
if (max_flag >= 7)
{
x86_cpuid(7, flags);
if (flags[1] & (1 << 5))
cpu |= RETRO_SIMD_AVX2;
}
x86_cpuid(0x80000000, flags);
max_flag = flags[0];
if (max_flag >= 0x80000001u)
{
x86_cpuid(0x80000001, flags);
if (flags[3] & (1 << 23))
cpu |= RETRO_SIMD_MMX;
if (flags[3] & (1 << 22))
cpu |= RETRO_SIMD_MMXEXT;
}
RARCH_LOG("[CPUID]: MMX: %u\n", !!(cpu & RETRO_SIMD_MMX));
RARCH_LOG("[CPUID]: MMXEXT: %u\n", !!(cpu & RETRO_SIMD_MMXEXT));
RARCH_LOG("[CPUID]: SSE: %u\n", !!(cpu & RETRO_SIMD_SSE));
RARCH_LOG("[CPUID]: SSE2: %u\n", !!(cpu & RETRO_SIMD_SSE2));
RARCH_LOG("[CPUID]: SSE3: %u\n", !!(cpu & RETRO_SIMD_SSE3));
RARCH_LOG("[CPUID]: SSSE3: %u\n", !!(cpu & RETRO_SIMD_SSSE3));
RARCH_LOG("[CPUID]: SSE4: %u\n", !!(cpu & RETRO_SIMD_SSE4));
RARCH_LOG("[CPUID]: SSE4.2: %u\n", !!(cpu & RETRO_SIMD_SSE42));
RARCH_LOG("[CPUID]: AVX: %u\n", !!(cpu & RETRO_SIMD_AVX));
RARCH_LOG("[CPUID]: AVX2: %u\n", !!(cpu & RETRO_SIMD_AVX2));
2012-11-02 20:25:54 +00:00
#elif defined(ANDROID) && defined(ANDROID_ARM)
uint64_t cpu_flags = android_getCpuFeatures();
(void)cpu_flags;
#ifdef __ARM_NEON__
2012-11-05 10:57:40 +00:00
if (cpu_flags & ANDROID_CPU_ARM_FEATURE_NEON)
{
cpu |= RETRO_SIMD_NEON;
arm_enable_runfast_mode();
}
#endif
RARCH_LOG("[CPUID]: NEON: %u\n", !!(cpu & RETRO_SIMD_NEON));
#elif defined(__ARM_NEON__)
cpu |= RETRO_SIMD_NEON;
arm_enable_runfast_mode();
RARCH_LOG("[CPUID]: NEON: %u\n", !!(cpu & RETRO_SIMD_NEON));
#elif defined(__ALTIVEC__)
cpu |= RETRO_SIMD_VMX;
RARCH_LOG("[CPUID]: VMX: %u\n", !!(cpu & RETRO_SIMD_VMX));
#elif defined(XBOX360)
cpu |= RETRO_SIMD_VMX128;
RARCH_LOG("[CPUID]: VMX128: %u\n", !!(cpu & RETRO_SIMD_VMX128));
2014-02-16 16:32:54 +00:00
#elif defined(PSP)
cpu |= RETRO_SIMD_VFPU;
RARCH_LOG("[CPUID]: VFPU: %u\n", !!(cpu & RETRO_SIMD_VFPU));
#elif defined(GEKKO)
cpu |= RETRO_SIMD_PS;
RARCH_LOG("[CPUID]: PS: %u\n", !!(cpu & RETRO_SIMD_PS));
#endif
return cpu;
}