Update benchmarks for GCC + x86.

Compile with make PERF_TEST=1 to enable performance logging.
This commit is contained in:
Themaister 2012-10-01 23:43:16 +02:00
parent 8ffd954122
commit 1bac5421ff
5 changed files with 61 additions and 29 deletions

View File

@ -137,8 +137,9 @@ ifeq ($(SCALER_NO_SIMD), 1)
DEFINES += -DSCALER_NO_SIMD
endif
ifeq ($(SCALER_PERF), 1)
DEFINES += -DSCALER_PERF
ifeq ($(PERF_TEST), 1)
DEFINES += -DPERF_TEST
OBJ += benchmark.o
endif
ifeq ($(HAVE_SDL), 1)

View File

@ -68,6 +68,11 @@ ifeq ($(SCALER_NO_SIMD), 1)
DEFINES += -DSCALER_NO_SIMD
endif
ifeq ($(PERF_TEST), 1)
DEFINES += -DPERF_TEST
OBJ += benchmark.o
endif
ifeq ($(HAVE_SDL), 1)
OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o
else ifeq ($(HAVE_FFMPEG), 1)

View File

@ -14,7 +14,7 @@
* If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include "benchmark.h"
#if defined(__CELLOS_LV2__) || defined(GEKKO)
#ifndef _PPU_INTRINSICS_H
@ -24,23 +24,31 @@
#include <PPCIntrinsics.h>
#endif
unsigned long long rarch_get_performance_counter(void)
rarch_perf_tick_t rarch_get_perf_counter(void)
{
unsigned long long time = 0;
rarch_perf_tick_t time = 0;
#ifdef _XBOX1
#define rdtsc __asm __emit 0fh __asm __emit 031h
LARGE_INTEGER time_tmp;
rdtsc;
__asm mov time_tmp.LowPart, eax;
__asm mov time_tmp.HighPart, edx;
time = time_tmp.QuadPart;
#elif defined(__i386__) || defined(__i486__) || defined(__x86_64__)
uint64_t lo, hi;
__asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi));
time = ((((uint64 t)hi) << 32) | ((uint64 t)lo) );
#elif defined(__GNUC__)
#if defined(__i386__) || defined(__i486__)
asm volatile ("rdtsc" : "=A" (time));
#elif defined(__x86_64__)
unsigned a, d;
asm volatile ("rdtsc" : "=a" (a), "=d" (d));
time = (rarch_perf_tick_t)a | ((rarch_perf_tick_t)d << 32);
#endif
#elif defined(__CELLOS_LV2__) || defined(GEKKO) || defined(_XBOX360)
time = __mftb();
#endif
(void)time;
return time;
}

View File

@ -17,22 +17,44 @@
#ifndef _RARCH_BENCHMARK_H
#define _RARCH_BENCHMARK_H
typedef struct performance_counter_t
#include "general.h"
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdint.h>
typedef unsigned long long rarch_perf_tick_t;
typedef struct rarch_perf_counter
{
unsigned long long start;
unsigned long long stop;
} performance_counter_t;
rarch_perf_tick_t start;
rarch_perf_tick_t total;
unsigned call_cnt;
} rarch_perf_counter_t;
unsigned long long rarch_get_performance_counter(void);
rarch_perf_tick_t rarch_get_perf_counter(void);
#define RARCH_PERFORMANCE_INIT(X) performance_counter_t (X)
#define RARCH_PERFORMANCE_START(X) ((X).start = rarch_get_performance_counter())
#define RARCH_PERFORMANCE_STOP(X) ((X).stop = rarch_get_performance_counter() - (X).start)
#ifdef PERF_TEST
#define RARCH_PERFORMANCE_INIT(X) static rarch_perf_counter_t X
#define RARCH_PERFORMANCE_START(X) ((X).start = rarch_get_perf_counter())
#define RARCH_PERFORMANCE_STOP(X) do { (X).total += rarch_get_perf_counter() - (X).start; (X).call_cnt++; } while(0)
#ifdef _WIN32
#define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("Time taken (%s): %I64u.\n", functionname, (X).stop)
#define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("[PERF]: Avg (%s): %I64u ticks.\n", functionname, (X).total / (X).call_cnt)
#else
#define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("Time taken (%s): %llu.\n", functionname, (X).stop)
#define RARCH_PERFORMANCE_LOG(functionname, X) RARCH_LOG("[PERF]: Avg (%s): %llu ticks.\n", functionname, (X).total / (X).call_cnt)
#endif
#else
#define RARCH_PERFORMANCE_INIT(X)
#define RARCH_PERFORMANCE_START(X)
#define RARCH_PERFORMANCE_STOP(X)
#define RARCH_PERFORMANCE_LOG(functionname, X)
#endif
#endif

View File

@ -21,6 +21,7 @@
#include <string.h>
#include <stdio.h>
#include <math.h>
#include "../../benchmark.h"
#ifdef SCALER_PERF
#include <time.h>
@ -189,10 +190,8 @@ void scaler_ctx_gen_reset(struct scaler_ctx *ctx)
void scaler_ctx_scale(struct scaler_ctx *ctx,
void *output, const void *input)
{
#ifdef SCALER_PERF
struct timespec start_tv, end_tv;
clock_gettime(CLOCK_MONOTONIC, &start_tv);
#endif
RARCH_PERFORMANCE_INIT(scaler_perf);
RARCH_PERFORMANCE_START(scaler_perf);
if (ctx->unscaled) // Just perform straight pixel conversion.
{
@ -262,10 +261,7 @@ void scaler_ctx_scale(struct scaler_ctx *ctx,
ctx->scaler_vert(ctx, output, ctx->out_stride);
}
#ifdef SCALER_PERF
clock_gettime(CLOCK_MONOTONIC, &end_tv);
ctx->elapsed_time_ms += (end_tv.tv_sec - start_tv.tv_sec) * 1000.0 + (end_tv.tv_nsec - start_tv.tv_nsec) / 1000000.0;
ctx->elapsed_frames++;
#endif
RARCH_PERFORMANCE_STOP(scaler_perf);
RARCH_PERFORMANCE_LOG("Scaler", scaler_perf);
}