Let CPU filters work with any format.

This commit is contained in:
Themaister 2012-10-21 22:58:33 +02:00
parent 84abbe110b
commit 668ff7687e
6 changed files with 116 additions and 32 deletions

View File

@ -423,19 +423,32 @@ void uninit_audio(void)
}
#ifdef HAVE_DYLIB
static void init_filter(void)
static void deinit_filter(void)
{
g_extern.filter.active = false;
if (g_extern.filter.lib)
dylib_close(g_extern.filter.lib);
g_extern.filter.lib = NULL;
free(g_extern.filter.buffer);
free(g_extern.filter.colormap);
free(g_extern.filter.scaler_out);
g_extern.filter.buffer = NULL;
g_extern.filter.colormap = NULL;
g_extern.filter.scaler_out = NULL;
scaler_ctx_gen_reset(&g_extern.filter.scaler);
memset(&g_extern.filter.scaler, 0, sizeof(g_extern.filter.scaler));
}
static void init_filter(bool rgb32)
{
if (g_extern.filter.active)
return;
if (*g_settings.video.filter_path == '\0')
if (!*g_settings.video.filter_path)
return;
if (g_extern.system.pix_fmt != RETRO_PIXEL_FORMAT_0RGB1555)
{
RARCH_WARN("CPU filters only support 0RGB1555.\n");
return;
}
RARCH_LOG("Loading bSNES filter from \"%s\"\n", g_settings.video.filter_path);
g_extern.filter.lib = dylib_load(g_settings.video.filter_path);
if (!g_extern.filter.lib)
@ -454,9 +467,7 @@ static void init_filter(void)
if (!g_extern.filter.psize || !g_extern.filter.prender)
{
RARCH_ERR("Failed to find functions in filter...\n");
dylib_close(g_extern.filter.lib);
g_extern.filter.lib = NULL;
return;
goto error;
}
g_extern.filter.active = true;
@ -473,12 +484,14 @@ static void init_filter(void)
g_extern.filter.buffer = (uint32_t*)malloc(RARCH_SCALE_BASE * RARCH_SCALE_BASE *
g_extern.filter.scale * g_extern.filter.scale * sizeof(uint32_t));
rarch_assert(g_extern.filter.buffer);
if (!g_extern.filter.buffer)
goto error;
g_extern.filter.pitch = RARCH_SCALE_BASE * g_extern.filter.scale * sizeof(uint32_t);
g_extern.filter.colormap = (uint32_t*)malloc(0x10000 * sizeof(uint32_t));
rarch_assert(g_extern.filter.colormap);
if (!g_extern.filter.colormap)
goto error;
// Set up conversion map from 16-bit XRGB1555 to 32-bit ARGB.
for (unsigned i = 0; i < 0x10000; i++)
@ -492,18 +505,23 @@ static void init_filter(void)
b = (b << 3) | (b >> 2);
g_extern.filter.colormap[i] = (r << 16) | (g << 8) | (b << 0);
}
}
static void deinit_filter(void)
{
if (!g_extern.filter.active)
return;
g_extern.filter.scaler_out = (uint16_t*)malloc(sizeof(uint16_t) * geom->max_width * geom->max_height);
if (!g_extern.filter.scaler_out)
goto error;
g_extern.filter.active = false;
dylib_close(g_extern.filter.lib);
g_extern.filter.lib = NULL;
free(g_extern.filter.buffer);
free(g_extern.filter.colormap);
g_extern.filter.scaler.scaler_type = SCALER_TYPE_POINT;
g_extern.filter.scaler.in_fmt = rgb32 ? SCALER_FMT_ARGB8888 : SCALER_FMT_RGB565;
g_extern.filter.scaler.out_fmt = SCALER_FMT_0RGB1555;
if (!scaler_ctx_gen_filter(&g_extern.filter.scaler))
goto error;
return;
error:
RARCH_ERR("CPU filter init failed.\n");
deinit_filter();
}
#endif
@ -542,13 +560,6 @@ static bool init_video_pixel_converter(unsigned size)
{
RARCH_WARN("0RGB1555 pixel format is deprecated, and will be slower. For 15/16-bit, RGB565 format is preferred.\n");
// We'll tweak these values later,
// just set most of them to something sane to begin with.
driver.scaler.in_width =
driver.scaler.in_height =
driver.scaler.out_width =
driver.scaler.out_height = size;
driver.scaler.scaler_type = SCALER_TYPE_POINT;
driver.scaler.in_fmt = SCALER_FMT_0RGB1555;
@ -567,7 +578,7 @@ static bool init_video_pixel_converter(unsigned size)
void init_video_input(void)
{
#ifdef HAVE_DYLIB
init_filter();
init_filter(g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888);
#endif
#ifdef HAVE_XML

View File

@ -355,6 +355,10 @@ struct global
void (*psize)(unsigned *width, unsigned *height);
void (*prender)(uint32_t *colormap, uint32_t *output, unsigned outpitch,
const uint16_t *input, unsigned pitch, unsigned width, unsigned height);
// CPU filters only work on *XRGB1555*. We have to convert to XRGB1555 first.
struct scaler_ctx scaler;
void *scaler_out;
} filter;
msg_queue_t *msg_queue;

View File

@ -27,6 +27,61 @@
#include <emmintrin.h>
#endif
#if defined(__SSE2_)
void conv_rgb565_0rgb1555(void *output_, const void *input_,
int width, int height,
int out_stride, int in_stride)
{
const uint16_t *input = (const uint16_t*)input_;
uint16_t *output = (uint16_t*)output_;
int max_width = width - 7;
const __m128i hi_mask = _mm_set1_epi16(0x7fe0);
const __m128i lo_mask = _mm_set1_epi16(0x1f);
for (int h = 0; h < height; h++, output += out_stride >> 1, input += in_stride >> 1)
{
int w;
for (w = 0; w < max_width; w += 8)
{
const __m128i in = _mm_loadu_si128((const __m128i*)(input + w));
__m128i hi = _mm_and_si128(_mm_slli_epi16(in, 1), hi_mask);
__m128i lo = _mm_and_si128(in, lo_mask);
_mm_storeu_si128((__m128i*)(output + w), _mm_or_si128(hi, lo));
}
for (; w < width; w++)
{
uint16_t col = input[w];
uint16_t hi = (col >> 1) & 0x7fe0;
uint16_t lo = col & 0x1f;
output[w] = hi | lo;
}
}
}
#else
void conv_rgb565_0rgb1555(void *output_, const void *input_,
int width, int height,
int out_stride, int in_stride)
{
const uint16_t *input = (const uint16_t*)input_;
uint16_t *output = (uint16_t*)output_;
for (int h = 0; h < height; h++, output += out_stride >> 1, input += in_stride >> 1)
{
for (int w = 0; w < width; w++)
{
uint16_t col = input[w];
uint16_t hi = (col >> 1) & 0x7fe0;
uint16_t lo = col & 0x1f;
output[w] = hi | lo;
}
}
}
#endif
#if defined(__SSE2__)
void conv_0rgb1555_rgb565(void *output_, const void *input_,
int width, int height,

View File

@ -24,6 +24,10 @@ void conv_0rgb1555_rgb565(void *output, const void *input,
int width, int height,
int out_stride, int in_stride);
void conv_rgb565_0rgb1555(void *output, const void *input,
int width, int height,
int out_stride, int in_stride);
void conv_rgb565_argb8888(void *output, const void *input,
int width, int height,
int out_stride, int in_stride);

View File

@ -74,6 +74,8 @@ static bool set_direct_pix_conv(struct scaler_ctx *ctx)
ctx->direct_pixconv = conv_rgb565_bgr24;
else if (ctx->in_fmt == SCALER_FMT_0RGB1555 && ctx->out_fmt == SCALER_FMT_RGB565)
ctx->direct_pixconv = conv_0rgb1555_rgb565;
else if (ctx->in_fmt == SCALER_FMT_RGB565 && ctx->out_fmt == SCALER_FMT_0RGB1555)
ctx->direct_pixconv = conv_rgb565_0rgb1555;
else if (ctx->in_fmt == SCALER_FMT_BGR24 && ctx->out_fmt == SCALER_FMT_ARGB8888)
ctx->direct_pixconv = conv_bgr24_argb8888;
else if (ctx->in_fmt == SCALER_FMT_ARGB8888 && ctx->out_fmt == SCALER_FMT_0RGB1555)

View File

@ -283,11 +283,19 @@ static void video_frame(const void *data, unsigned width, unsigned height, size_
#ifdef HAVE_DYLIB
if (g_extern.filter.active && data)
{
struct scaler_ctx *scaler = &g_extern.filter.scaler;
scaler->in_width = scaler->out_width = width;
scaler->in_height = scaler->out_height = height;
scaler->in_stride = pitch;
scaler->out_stride = width * sizeof(uint16_t);
scaler_ctx_scale(scaler, g_extern.filter.scaler_out, data);
unsigned owidth = width;
unsigned oheight = height;
g_extern.filter.psize(&owidth, &oheight);
g_extern.filter.prender(g_extern.filter.colormap, g_extern.filter.buffer,
g_extern.filter.pitch, (const uint16_t*)data, pitch, width, height);
g_extern.filter.pitch, g_extern.filter.scaler_out, scaler->out_stride, width, height);
#ifdef HAVE_FFMPEG
if (g_extern.recording && g_settings.video.post_filter_record)