RetroArch/rewind.c

260 lines
7.0 KiB
C
Raw Normal View History

2012-04-21 21:13:50 +00:00
/* RetroArch - A frontend for libretro.
2014-01-01 00:50:59 +00:00
* Copyright (C) 2010-2014 - Hans-Kristian Arntzen
2011-01-31 15:06:46 +00:00
*
2012-04-21 21:13:50 +00:00
* RetroArch is free software: you can redistribute it and/or modify it under the terms
2011-01-31 15:06:46 +00:00
* of the GNU General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
2012-04-21 21:13:50 +00:00
* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
2011-01-31 15:06:46 +00:00
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
2012-04-21 21:31:57 +00:00
* You should have received a copy of the GNU General Public License along with RetroArch.
2011-01-31 15:06:46 +00:00
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "rewind.h"
#include "performance.h"
2011-01-31 15:06:46 +00:00
#include <stdlib.h>
#include <stdint.h>
2011-12-24 12:46:12 +00:00
#include "boolean.h"
2011-01-31 15:06:46 +00:00
#include <string.h>
#include <limits.h>
#include "general.h"
2011-01-31 15:06:46 +00:00
struct state_manager
{
uint64_t *buffer;
size_t buf_size;
size_t buf_size_mask;
2011-01-31 15:06:46 +00:00
uint32_t *tmp_state;
size_t top_ptr;
size_t bottom_ptr;
size_t state_size;
bool first_pop;
2011-01-31 15:06:46 +00:00
};
static inline size_t nearest_pow2_size(size_t v)
{
size_t orig = v;
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
2011-12-24 23:59:46 +00:00
#if SIZE_MAX >= UINT16_C(0xffff)
v |= v >> 8;
2011-11-01 20:28:32 +00:00
#endif
2011-12-24 23:59:46 +00:00
#if SIZE_MAX >= UINT32_C(0xffffffff)
v |= v >> 16;
2011-11-01 20:28:32 +00:00
#endif
2011-12-24 23:59:46 +00:00
#if SIZE_MAX >= UINT64_C(0xffffffffffffffff)
v |= v >> 32;
2011-11-01 20:28:32 +00:00
#endif
v++;
size_t next = v;
size_t prev = v >> 1;
if ((next - orig) < (orig - prev))
return next;
else
return prev;
}
2011-01-31 15:06:46 +00:00
state_manager_t *state_manager_new(size_t state_size, size_t buffer_size, void *init_buffer)
{
if (buffer_size <= state_size * 4) // Need a sufficient buffer size.
2011-01-31 15:06:46 +00:00
return NULL;
2011-12-24 12:46:12 +00:00
state_manager_t *state = (state_manager_t*)calloc(1, sizeof(*state));
2011-01-31 15:06:46 +00:00
if (!state)
return NULL;
// We need 4-byte aligned state_size to avoid having to enforce this with unneeded memcpy's!
2012-04-21 21:25:32 +00:00
rarch_assert(state_size % 4 == 0);
2011-01-31 15:06:46 +00:00
state->top_ptr = 1;
state->state_size = state_size / sizeof(uint32_t); // Works in multiple of 4.
state->buf_size = nearest_pow2_size(buffer_size) / sizeof(uint64_t); // Works in multiple of 8.
state->buf_size_mask = state->buf_size - 1;
2012-04-21 21:25:32 +00:00
RARCH_LOG("Readjusted rewind buffer size to %u MiB\n", (unsigned)(sizeof(uint64_t) * (state->buf_size >> 20)));
2011-12-24 12:46:12 +00:00
if (!(state->buffer = (uint64_t*)calloc(1, state->buf_size * sizeof(uint64_t))))
2011-01-31 15:06:46 +00:00
goto error;
2011-12-24 12:46:12 +00:00
if (!(state->tmp_state = (uint32_t*)calloc(1, state->state_size * sizeof(uint32_t))))
2011-01-31 15:06:46 +00:00
goto error;
memcpy(state->tmp_state, init_buffer, state_size);
return state;
error:
if (state)
{
free(state->buffer);
free(state->tmp_state);
free(state);
}
return NULL;
}
void state_manager_free(state_manager_t *state)
{
free(state->buffer);
free(state->tmp_state);
free(state);
}
bool state_manager_pop(state_manager_t *state, void **data)
{
*data = state->tmp_state;
2011-02-26 00:13:52 +00:00
if (state->first_pop)
{
state->first_pop = false;
return true;
}
2011-01-31 15:06:46 +00:00
state->top_ptr = (state->top_ptr - 1) & state->buf_size_mask;
2011-01-31 15:06:46 +00:00
if (state->top_ptr == state->bottom_ptr) // Our stack is completely empty... :v
{
state->top_ptr = (state->top_ptr + 1) & state->buf_size_mask;
2011-01-31 15:06:46 +00:00
return false;
}
while (state->buffer[state->top_ptr])
{
// Apply the xor patch.
uint32_t addr = state->buffer[state->top_ptr] >> 32;
2011-12-24 12:46:12 +00:00
uint32_t xor_ = state->buffer[state->top_ptr] & 0xFFFFFFFFU;
state->tmp_state[addr] ^= xor_;
2011-01-31 15:06:46 +00:00
state->top_ptr = (state->top_ptr - 1) & state->buf_size_mask;
2011-01-31 15:06:46 +00:00
}
if (state->top_ptr == state->bottom_ptr) // Our stack is completely empty... :v
{
state->top_ptr = (state->top_ptr + 1) & state->buf_size_mask;
2011-01-31 15:06:46 +00:00
return true;
}
return true;
}
static void reassign_bottom(state_manager_t *state)
{
state->bottom_ptr = (state->top_ptr + 1) & state->buf_size_mask;
2011-01-31 15:06:46 +00:00
while (state->buffer[state->bottom_ptr]) // Skip ahead until we find the first 0 (boundary for state delta).
state->bottom_ptr = (state->bottom_ptr + 1) & state->buf_size_mask;
2011-01-31 15:06:46 +00:00
}
#if __SSE2__
#if defined(__GNUC__)
static inline unsigned compat_ctz(uint32_t v)
{
return __builtin_ctz(v);
}
#else
// Only checks at nibble granularity, because that's what we need.
static inline unsigned compat_ctz(uint32_t v)
{
if (v & 0x000f)
return 0;
if (v & 0x00f0)
return 4;
if (v & 0x0f00)
return 8;
if (v & 0xf000)
return 12;
return 16;
}
#endif
#include <emmintrin.h>
// There's no equivalent in libc, you'd think so ... std::mismatch exists, but it's not optimized at all. :(
static unsigned find_mismatch(const uint32_t *a, const uint32_t *b, unsigned samples)
{
unsigned i;
unsigned sse_samples = samples & ~3;
for (i = 0; i < sse_samples; i += 4)
{
__m128i v0 = _mm_loadu_si128((const __m128i*)(a + i));
__m128i v1 = _mm_loadu_si128((const __m128i*)(b + i));
__m128i c = _mm_cmpeq_epi32(v0, v1);
uint32_t mask = _mm_movemask_epi8(c);
if (mask != 0xffff) // Something has changed, figure out where.
return i + (compat_ctz(~mask) >> 2);
}
for (; i < samples; i++)
if (a[i] != b[i])
return i;
return samples;
}
#else
static unsigned find_mismatch(const uint32_t *a, const uint32_t *b, unsigned samples)
{
unsigned i;
for (i = 0; i < samples; i++)
if (a[i] != b[i])
return i;
return samples;
}
#endif
static void generate_delta(state_manager_t *state, const void *data)
2011-01-31 15:06:46 +00:00
{
size_t i;
2011-01-31 15:06:46 +00:00
bool crossed = false;
uint32_t *old_state = state->tmp_state;
2011-12-24 12:46:12 +00:00
const uint32_t *new_state = (const uint32_t*)data;
2011-01-31 15:06:46 +00:00
state->buffer[state->top_ptr++] = 0; // For each separate delta, we have a 0 value sentinel in between.
2011-11-01 20:28:32 +00:00
state->top_ptr &= state->buf_size_mask;
// Check if top_ptr and bottom_ptr crossed each other, which means we need to delete old cruft.
if (state->top_ptr == state->bottom_ptr)
2011-01-31 15:06:46 +00:00
crossed = true;
for (i = 0; i < state->state_size; i++)
2011-01-31 15:06:46 +00:00
{
unsigned avail = state->state_size - i;
unsigned pos = find_mismatch(old_state + i, new_state + i, avail);
if (pos == avail)
break;
i += pos;
2011-01-31 15:06:46 +00:00
2011-10-23 15:13:16 +00:00
// If the data differs (xor != 0), we push that xor on the stack with index and xor.
// This can be reversed by reapplying the xor.
// This, if states don't really differ much, we'll save lots of space :)
// Hopefully this will work really well with save states.
uint32_t xor_ = old_state[i] ^ new_state[i];
old_state[i] = new_state[i];
state->buffer[state->top_ptr] = ((uint64_t)i << 32) | xor_;
state->top_ptr = (state->top_ptr + 1) & state->buf_size_mask;
if (state->top_ptr == state->bottom_ptr)
crossed = true;
2011-01-31 15:06:46 +00:00
}
if (crossed)
reassign_bottom(state);
}
bool state_manager_push(state_manager_t *state, const void *data)
2011-01-31 15:06:46 +00:00
{
RARCH_PERFORMANCE_INIT(gen_delta);
RARCH_PERFORMANCE_START(gen_delta);
generate_delta(state, data);
RARCH_PERFORMANCE_STOP(gen_delta);
state->first_pop = true;
2011-01-31 15:06:46 +00:00
return true;
}