Begin reworking RGB565 as default pixel format.

This commit is contained in:
Themaister 2012-10-20 01:12:02 +02:00
parent 301848e037
commit 93078889cc
19 changed files with 467 additions and 101 deletions

View File

@ -19,7 +19,11 @@ OBJ = retroarch.o \
audio/null.o \
gfx/null.o \
input/null.o \
screenshot.o
screenshot.o \
gfx/scaler/scaler.o \
gfx/scaler/pixconv.o \
gfx/scaler/scaler_int.o \
gfx/scaler/filter.o
JOYCONFIG_OBJ = tools/retroarch-joyconfig.o \
conf/config_file.o \
@ -142,14 +146,6 @@ ifeq ($(PERF_TEST), 1)
OBJ += benchmark.o
endif
ifeq ($(HAVE_SDL), 1)
OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o
else ifeq ($(HAVE_OPENGL), 1)
OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o
else ifeq ($(HAVE_FFMPEG), 1)
OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o
endif
ifeq ($(HAVE_SDL), 1)
OBJ += gfx/sdl_gfx.o input/sdl_input.o input/sdl_joypad.o audio/sdl_audio.o
JOYCONFIG_OBJ += input/sdl_joypad.o

View File

@ -22,7 +22,11 @@ OBJ = retroarch.o \
input/null.o \
fifo_buffer.o \
gfx/null.o \
media/rarch.o
media/rarch.o \
gfx/scaler/scaler.o \
gfx/scaler/pixconv.o \
gfx/scaler/scaler_int.o \
gfx/scaler/filter.o
JOBJ := conf/config_file.o \
tools/retroarch-joyconfig.o \
@ -75,14 +79,6 @@ ifeq ($(PERF_TEST), 1)
OBJ += benchmark.o
endif
ifeq ($(HAVE_SDL), 1)
OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o
else ifeq ($(HAVE_OPENGL), 1)
OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o
else ifeq ($(HAVE_FFMPEG), 1)
OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o
endif
JLIBS =
ifeq ($(HAVE_SDL), 1)

View File

@ -430,9 +430,9 @@ static void init_filter(void)
if (*g_settings.video.filter_path == '\0')
return;
if (g_extern.system.rgb32)
if (g_extern.system.pix_fmt != RETRO_PIXEL_FORMAT_0RGB1555)
{
RARCH_WARN("libretro implementation uses XRGB8888 format. CPU filters only support 0RGB1555.\n");
RARCH_WARN("CPU filters only support 0RGB1555.\n");
return;
}
@ -536,6 +536,34 @@ static void init_shader_dir(void)
}
#endif
static bool init_video_pixel_converter(unsigned size)
{
if (g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_0RGB1555)
{
RARCH_WARN("0RGB1555 pixel format is deprecated, and will be slower. For 15/16-bit, RGB565 format is preferred.\n");
// We'll tweak these values later,
// just set most of them to something sane to begin with.
driver.scaler.in_width =
driver.scaler.in_height =
driver.scaler.out_width =
driver.scaler.out_height = size;
driver.scaler.scaler_type = SCALER_TYPE_POINT;
driver.scaler.in_fmt = SCALER_FMT_0RGB1555;
// TODO: Pick either ARGB8888 or RGB565 depending on driver ...
driver.scaler.out_fmt = SCALER_FMT_RGB565;
if (!scaler_ctx_gen_filter(&driver.scaler))
return false;
driver.scaler_out = calloc(sizeof(uint16_t), size * size);
}
return true;
}
void init_video_input(void)
{
#ifdef HAVE_DYLIB
@ -585,12 +613,21 @@ void init_video_input(void)
}
}
RARCH_LOG("Video @ %ux%u\n", width, height);
if (width && height)
RARCH_LOG("Video @ %ux%u\n", width, height);
else
RARCH_LOG("Video @ fullscreen\n");
driver.display_type = RARCH_DISPLAY_NONE;
driver.video_display = 0;
driver.video_window = 0;
if (!init_video_pixel_converter(RARCH_SCALE_BASE * scale))
{
RARCH_ERR("Failed to init pixel converter.\n");
rarch_fail(1, "init_video_input()");
}
video_info_t video = {0};
video.width = width;
video.height = height;
@ -599,7 +636,7 @@ void init_video_input(void)
video.force_aspect = g_settings.video.force_aspect;
video.smooth = g_settings.video.smooth;
video.input_scale = scale;
video.rgb32 = g_extern.filter.active || g_extern.system.rgb32;
video.rgb32 = g_extern.filter.active || (g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888);
const input_driver_t *tmp = driver.input;
driver.video_data = video_init_func(&video, &driver.input, &driver.input_data);
@ -643,6 +680,14 @@ void init_video_input(void)
}
}
static void deinit_pixel_converter(void)
{
scaler_ctx_gen_reset(&driver.scaler);
memset(&driver.scaler, 0, sizeof(driver.scaler));
free(driver.scaler_out);
driver.scaler_out = NULL;
}
void uninit_video_input(void)
{
if (driver.input_data != driver.video_data && driver.input)
@ -651,6 +696,8 @@ void uninit_video_input(void)
if (driver.video_data && driver.video)
video_free_func();
deinit_pixel_converter();
#ifdef HAVE_DYLIB
deinit_filter();
#endif

View File

@ -23,6 +23,7 @@
#include <stdlib.h>
#include <stdint.h>
#include "msvc/msvc_compat.h"
#include "gfx/scaler/scaler.h"
#ifdef HAVE_CONFIG_H
#include "config.h"
@ -240,6 +241,9 @@ typedef struct driver
uintptr_t video_display;
uintptr_t video_window;
enum rarch_display_type display_type;
struct scaler_ctx scaler;
void *scaler_out;
} driver_t;
void init_drivers(void);

View File

@ -420,17 +420,18 @@ static bool environment_cb(unsigned cmd, void *data)
case RETRO_ENVIRONMENT_SET_PIXEL_FORMAT:
{
enum retro_pixel_format pix_fmt = *(const enum retro_pixel_format*)data;
bool rgb32 = false;
switch (pix_fmt)
{
case RETRO_PIXEL_FORMAT_0RGB1555:
rgb32 = false;
RARCH_LOG("Environ SET_PIXEL_FORMAT: 0RGB1555.\n");
break;
case RETRO_PIXEL_FORMAT_RGB565:
RARCH_LOG("Environ SET_PIXEL_FORMAT: RGB565.\n");
break;
#ifndef RARCH_CONSOLE
case RETRO_PIXEL_FORMAT_XRGB8888:
rgb32 = true;
RARCH_LOG("Environ SET_PIXEL_FORMAT: XRGB8888.\n");
break;
#endif
@ -438,7 +439,7 @@ static bool environment_cb(unsigned cmd, void *data)
return false;
}
g_extern.system.rgb32 = rgb32;
g_extern.system.pix_fmt = pix_fmt;
break;
}

View File

@ -302,7 +302,7 @@ struct global
unsigned rotation;
bool shutdown;
unsigned performance_level;
bool rgb32;
enum retro_pixel_format pix_fmt;
bool force_nonblock;

View File

@ -460,8 +460,8 @@ static void gl_create_fbo_textures(gl_t *gl)
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter_type);
glTexImage2D(GL_TEXTURE_2D,
0, RARCH_GL_INTERNAL_FORMAT, gl->fbo_rect[i].width, gl->fbo_rect[i].height,
0, RARCH_GL_TEXTURE_TYPE,
0, RARCH_GL_INTERNAL_FORMAT32, gl->fbo_rect[i].width, gl->fbo_rect[i].height,
0, RARCH_GL_TEXTURE_TYPE32,
RARCH_GL_FORMAT32, NULL);
}
@ -710,8 +710,8 @@ static void gl_check_fbo_dimensions(gl_t *gl)
glBindTexture(GL_TEXTURE_2D, gl->fbo_texture[i]);
glTexImage2D(GL_TEXTURE_2D,
0, RARCH_GL_INTERNAL_FORMAT, gl->fbo_rect[i].width, gl->fbo_rect[i].height,
0, RARCH_GL_TEXTURE_TYPE,
0, RARCH_GL_INTERNAL_FORMAT32, gl->fbo_rect[i].width, gl->fbo_rect[i].height,
0, RARCH_GL_TEXTURE_TYPE32,
RARCH_GL_FORMAT32, NULL);
pglFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, gl->fbo_texture[i], 0);
@ -862,8 +862,8 @@ static void gl_update_input_size(gl_t *gl, unsigned width, unsigned height, unsi
}
// It is *much* faster (order of mangnitude on my setup) to use a custom SIMD-optimized conversion routine than letting GL do it :(
#if !defined(HAVE_PSGL)
static inline void gl_convert_frame_rgb15_32(gl_t *gl, void *output, const void *input, int width, int height, int in_pitch)
#if !defined(HAVE_PSGL) && !defined(HAVE_OPENGLES2)
static inline void gl_convert_frame_rgb16_32(gl_t *gl, void *output, const void *input, int width, int height, int in_pitch)
{
if (width != gl->scaler.in_width || height != gl->scaler.in_height)
{
@ -871,7 +871,7 @@ static inline void gl_convert_frame_rgb15_32(gl_t *gl, void *output, const void
gl->scaler.in_height = height;
gl->scaler.out_width = width;
gl->scaler.out_height = height;
gl->scaler.in_fmt = SCALER_FMT_0RGB1555;
gl->scaler.in_fmt = SCALER_FMT_RGB565;
gl->scaler.out_fmt = SCALER_FMT_ARGB8888;
gl->scaler.scaler_type = SCALER_TYPE_POINT;
scaler_ctx_gen_filter(&gl->scaler);
@ -925,37 +925,37 @@ static void gl_init_textures(gl_t *gl)
#else
static inline void gl_copy_frame(gl_t *gl, const void *frame, unsigned width, unsigned height, unsigned pitch)
{
if (gl->base_size == 2) // ARGB1555 => ARGB8888, SIMD-style :D
#ifdef HAVE_OPENGLES2
// No GL_UNPACK_ROW_LENGTH ;(
unsigned pitch_width = pitch / gl->base_size;
if (width == pitch_width) // Happy path :D
{
glPixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(width * sizeof(uint32_t))); // Always use 32-bit textures.
gl_convert_frame_rgb15_32(gl, gl->conv_buffer, frame, width, height, pitch);
glTexSubImage2D(GL_TEXTURE_2D,
0, 0, 0, width, height, gl->texture_type,
gl->texture_fmt, frame);
}
else // Slower path.
{
const uint8_t *src = (const uint8_t*)frame;
for (unsigned h = 0; h < height; h++, src += pitch)
{
glTexSubImage2D(GL_TEXTURE_2D,
0, 0, h, width, 1, gl->texture_type,
gl->texture_fmt, src);
}
}
#else
glPixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(pitch));
if (gl->base_size == 2)
{
// Always use 32-bit textures on desktop GL.
gl_convert_frame_rgb16_32(gl, gl->conv_buffer, frame, width, height, pitch);
glTexSubImage2D(GL_TEXTURE_2D,
0, 0, 0, width, height, gl->texture_type,
gl->texture_fmt, gl->conv_buffer);
}
else
{
#ifdef HAVE_OPENGLES2
// No GL_UNPACK_ROW_LENGTH ;(
unsigned pitch_width = pitch / gl->base_size;
if (width == pitch_width) // Happy path :D
{
glTexSubImage2D(GL_TEXTURE_2D,
0, 0, 0, width, height, gl->texture_type,
gl->texture_fmt, frame);
}
else // Probably slower path.
{
const uint32_t *src = (const uint32_t*)frame;
for (unsigned h = 0; h < height; h++, src += pitch_width)
{
glTexSubImage2D(GL_TEXTURE_2D,
0, 0, h, width, 1, gl->texture_type,
gl->texture_fmt, src);
}
}
#else
glPixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(pitch));
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / gl->base_size);
glTexSubImage2D(GL_TEXTURE_2D,
@ -963,8 +963,8 @@ static inline void gl_copy_frame(gl_t *gl, const void *frame, unsigned width, un
gl->texture_fmt, frame);
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
#endif
}
#endif
}
static void gl_init_textures(gl_t *gl)
@ -980,7 +980,7 @@ static void gl_init_textures(gl_t *gl)
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, gl->tex_filter);
glTexImage2D(GL_TEXTURE_2D,
0, RARCH_GL_INTERNAL_FORMAT, gl->tex_w, gl->tex_h, 0, gl->texture_type,
0, gl->internal_fmt, gl->tex_w, gl->tex_h, 0, gl->texture_type,
gl->texture_fmt, gl->empty_buf ? gl->empty_buf : NULL);
}
glBindTexture(GL_TEXTURE_2D, gl->texture[gl->tex_index]);
@ -1305,7 +1305,8 @@ static void *gl_init(const video_info_t *video, const input_driver_t **input, vo
else
gl->tex_filter = video->smooth ? GL_LINEAR : GL_NEAREST;
gl->texture_type = RARCH_GL_TEXTURE_TYPE;
gl->internal_fmt = video->rgb32 ? RARCH_GL_INTERNAL_FORMAT32 : RARCH_GL_INTERNAL_FORMAT16;
gl->texture_type = video->rgb32 ? RARCH_GL_TEXTURE_TYPE32 : RARCH_GL_TEXTURE_TYPE16;
gl->texture_fmt = video->rgb32 ? RARCH_GL_FORMAT32 : RARCH_GL_FORMAT16;
gl->base_size = video->rgb32 ? sizeof(uint32_t) : sizeof(uint16_t);

View File

@ -210,7 +210,8 @@ typedef struct gl
struct gl_coords coords;
GLuint pbo;
GLenum texture_type; // XBGR1555 or ARGB
GLenum internal_fmt;
GLenum texture_type; // RGB565 or ARGB
GLenum texture_fmt;
GLenum border_type;
unsigned base_size; // 2 or 4
@ -245,19 +246,25 @@ extern PFNGLACTIVETEXTUREPROC pglActiveTexture;
#endif
#if defined(HAVE_PSGL)
#define RARCH_GL_INTERNAL_FORMAT GL_ARGB_SCE
#define RARCH_GL_TEXTURE_TYPE GL_BGRA
#define RARCH_GL_INTERNAL_FORMAT32 GL_ARGB_SCE
#define RARCH_GL_INTERNAL_FORMAT16 GL_ARGB_SCE
#define RARCH_GL_TEXTURE_TYPE32 GL_BGRA
#define RARCH_GL_TEXTURE_TYPE16 GL_BGRA
#define RARCH_GL_FORMAT32 GL_UNSIGNED_INT_8_8_8_8_REV
#define RARCH_GL_FORMAT16 GL_RGB5_A1
#elif defined(HAVE_OPENGLES)
#define RARCH_GL_INTERNAL_FORMAT GL_BGRA_EXT
#define RARCH_GL_TEXTURE_TYPE GL_BGRA_EXT
#define RARCH_GL_INTERNAL_FORMAT32 GL_BGRA_EXT
#define RARCH_GL_INTERNAL_FORMAT16 GL_RGB
#define RARCH_GL_TEXTURE_TYPE32 GL_BGRA_EXT
#define RARCH_GL_TEXTURE_TYPE16 GL_RGB
#define RARCH_GL_FORMAT32 GL_UNSIGNED_BYTE
// 15-bit is converted to 32-bit directly as we have to convert anyways.
#define RARCH_GL_FORMAT16 GL_UNSIGNED_BYTE
#define RARCH_GL_FORMAT16 GL_UNSIGNED_SHORT_5_6_5
#else
#define RARCH_GL_INTERNAL_FORMAT GL_RGBA
#define RARCH_GL_TEXTURE_TYPE GL_BGRA
// On desktop, we always use 32-bit.
#define RARCH_GL_INTERNAL_FORMAT32 GL_RGBA
#define RARCH_GL_INTERNAL_FORMAT16 GL_RGBA
#define RARCH_GL_TEXTURE_TYPE32 GL_BGRA
#define RARCH_GL_TEXTURE_TYPE16 GL_BGRA
#define RARCH_GL_FORMAT32 GL_UNSIGNED_INT_8_8_8_8_REV
#define RARCH_GL_FORMAT16 GL_UNSIGNED_INT_8_8_8_8_REV
#endif

View File

@ -27,6 +27,64 @@
#include <emmintrin.h>
#endif
#if defined(__SSE2__)
void conv_0rgb1555_rgb565(void *output_, const void *input_,
int width, int height,
int out_stride, int in_stride)
{
const uint16_t *input = (const uint16_t*)input_;
uint16_t *output = (uint16_t*)output_;
int max_width = width - 7;
const __m128i hi_mask = _mm_set1_epi16((int16_t)((0x1f << 11) | (0x1f << 6)));
const __m128i lo_mask = _mm_set1_epi16(0x1f);
const __m128i glow_mask = _mm_set1_epi16(1 << 5);
for (int h = 0; h < height; h++, output += out_stride >> 1, input += in_stride >> 1)
{
int w;
for (w = 0; w < max_width; w += 8)
{
const __m128i in = _mm_loadu_si128((const __m128i*)(input + w));
__m128i rg = _mm_and_si128(_mm_slli_epi16(in, 1), hi_mask);
__m128i b = _mm_and_si128(in, lo_mask);
__m128i glow = _mm_and_si128(_mm_srli_epi16(in, 4), glow_mask);
_mm_storeu_si128((__m128i*)(output + w), _mm_or_si128(rg, _mm_or_si128(b, glow)));
}
for (; w < width; w++)
{
uint16_t col = input[w];
uint16_t rg = (col << 1) & ((0x1f << 11) | (0x1f << 6));
uint16_t b = col & 0x1f;
uint16_t glow = (col >> 4) & (1 << 5);
output[w] = rg | b | glow;
}
}
}
#else
void conv_0rgb1555_rgb565(void *output_, const void *input_,
int width, int height,
int out_stride, int in_stride)
{
const uint16_t *input = (const uint16_t*)input_;
uint16_t *output = (uint16_t*)output_;
for (int h = 0; h < height; h++, output += out_stride >> 1, input += in_stride >> 1)
{
for (int w = 0; w < width; w++)
{
uint16_t col = input[w];
uint16_t rg = (col << 1) & ((0x1f << 11) | (0x1f << 6));
uint16_t b = col & 0x1f;
uint16_t glow = (col >> 4) & (1 << 5);
output[w] = rg | b | glow;
}
}
}
#endif
#if defined(__SSE2__)
void conv_0rgb1555_argb8888(void *output_, const void *input_,
int width, int height,
@ -109,6 +167,90 @@ void conv_0rgb1555_argb8888(void *output_, const void *input_,
}
#endif
#if defined(__SSE2__)
void conv_rgb565_argb8888(void *output_, const void *input_,
int width, int height,
int out_stride, int in_stride)
{
const uint16_t *input = (const uint16_t*)input_;
uint32_t *output = (uint32_t*)output_;
const __m128i pix_mask_r = _mm_set1_epi16(0x1f << 10);
const __m128i pix_mask_g = _mm_set1_epi16(0x3f << 5);
const __m128i pix_mask_b = _mm_set1_epi16(0x1f << 5);
const __m128i mul16_r = _mm_set1_epi16(0x0210);
const __m128i mul16_g = _mm_set1_epi16(0x2080);
const __m128i mul16_b = _mm_set1_epi16(0x4200);
const __m128i a = _mm_set1_epi16(0x00ff);
int max_width = width - 7;
for (int h = 0; h < height; h++, output += out_stride >> 2, input += in_stride >> 1)
{
int w;
for (w = 0; w < max_width; w += 8)
{
const __m128i in = _mm_loadu_si128((const __m128i*)(input + w));
__m128i r = _mm_and_si128(_mm_srli_epi16(in, 1), pix_mask_r);
__m128i g = _mm_and_si128(in, pix_mask_g);
__m128i b = _mm_and_si128(_mm_slli_epi16(in, 5), pix_mask_b);
r = _mm_mulhi_epi16(r, mul16_r);
g = _mm_mulhi_epi16(g, mul16_g);
b = _mm_mulhi_epi16(b, mul16_b);
__m128i res_lo_bg = _mm_unpacklo_epi8(b, g);
__m128i res_hi_bg = _mm_unpackhi_epi8(b, g);
__m128i res_lo_ra = _mm_unpacklo_epi8(r, a);
__m128i res_hi_ra = _mm_unpackhi_epi8(r, a);
__m128i res_lo = _mm_or_si128(res_lo_bg, _mm_slli_si128(res_lo_ra, 2));
__m128i res_hi = _mm_or_si128(res_hi_bg, _mm_slli_si128(res_hi_ra, 2));
_mm_storeu_si128((__m128i*)(output + w + 0), res_lo);
_mm_storeu_si128((__m128i*)(output + w + 4), res_hi);
}
for (; w < width; w++)
{
uint32_t col = input[w];
uint32_t r = (col >> 11) & 0x1f;
uint32_t g = (col >> 5) & 0x3f;
uint32_t b = (col >> 0) & 0x1f;
r = (r << 3) | (r >> 2);
g = (g << 2) | (g >> 4);
b = (b << 3) | (b >> 2);
output[w] = (0xff << 24) | (r << 16) | (g << 8) | (b << 0);
}
}
}
#else
void conv_rgb565_argb8888(void *output_, const void *input_,
int width, int height,
int out_stride, int in_stride)
{
const uint16_t *input = (const uint16_t*)input_;
uint32_t *output = (uint32_t*)output_;
for (int h = 0; h < height; h++, output += out_stride >> 2, input += in_stride >> 1)
{
for (int w = 0; w < width; w++)
{
uint32_t col = input[w];
uint32_t r = (col >> 11) & 0x1f;
uint32_t g = (col >> 5) & 0x3f;
uint32_t b = (col >> 0) & 0x1f;
r = (r << 3) | (r >> 2);
g = (g << 2) | (g >> 4);
b = (b << 3) | (b >> 2);
output[w] = (0xff << 24) | (r << 16) | (g << 8) | (b << 0);
}
}
}
#endif
#if defined(__SSE2__)
// :( TODO: Make this saner.
static inline void store_bgr24_sse2(void *output, __m128i a, __m128i b, __m128i c, __m128i d)
@ -223,6 +365,80 @@ void conv_0rgb1555_bgr24(void *output_, const void *input_,
}
}
}
void conv_rgb565_bgr24(void *output_, const void *input_,
int width, int height,
int out_stride, int in_stride)
{
const uint16_t *input = (const uint16_t*)input_;
uint8_t *output = (uint8_t*)output_;
const __m128i pix_mask_r = _mm_set1_epi16(0x1f << 10);
const __m128i pix_mask_g = _mm_set1_epi16(0x3f << 5);
const __m128i pix_mask_b = _mm_set1_epi16(0x1f << 5);
const __m128i mul16_r = _mm_set1_epi16(0x0210);
const __m128i mul16_g = _mm_set1_epi16(0x2080);
const __m128i mul16_b = _mm_set1_epi16(0x4200);
const __m128i a = _mm_set1_epi16(0x00ff);
int max_width = width - 15;
for (int h = 0; h < height; h++, output += out_stride, input += in_stride >> 1)
{
uint8_t *out = output;
int w;
for (w = 0; w < max_width; w += 16, out += 48)
{
const __m128i in0 = _mm_loadu_si128((const __m128i*)(input + w));
const __m128i in1 = _mm_loadu_si128((const __m128i*)(input + w + 8));
__m128i r0 = _mm_and_si128(_mm_srli_epi16(in0, 1), pix_mask_r);
__m128i g0 = _mm_and_si128(in0, pix_mask_g);
__m128i b0 = _mm_and_si128(_mm_slli_epi16(in0, 5), pix_mask_b);
__m128i r1 = _mm_and_si128(_mm_srli_epi16(in1, 1), pix_mask_r);
__m128i g1 = _mm_and_si128(in1, pix_mask_g);
__m128i b1 = _mm_and_si128(_mm_slli_epi16(in1, 5), pix_mask_b);
r0 = _mm_mulhi_epi16(r0, mul16_r);
g0 = _mm_mulhi_epi16(g0, mul16_g);
b0 = _mm_mulhi_epi16(b0, mul16_b);
r1 = _mm_mulhi_epi16(r1, mul16_r);
g1 = _mm_mulhi_epi16(g1, mul16_g);
b1 = _mm_mulhi_epi16(b1, mul16_b);
__m128i res_lo_bg0 = _mm_unpacklo_epi8(b0, g0);
__m128i res_hi_bg0 = _mm_unpackhi_epi8(b0, g0);
__m128i res_lo_ra0 = _mm_unpacklo_epi8(r0, a);
__m128i res_hi_ra0 = _mm_unpackhi_epi8(r0, a);
__m128i res_lo_bg1 = _mm_unpacklo_epi8(b1, g1);
__m128i res_hi_bg1 = _mm_unpackhi_epi8(b1, g1);
__m128i res_lo_ra1 = _mm_unpacklo_epi8(r1, a);
__m128i res_hi_ra1 = _mm_unpackhi_epi8(r1, a);
__m128i res_lo0 = _mm_or_si128(res_lo_bg0, _mm_slli_si128(res_lo_ra0, 2));
__m128i res_hi0 = _mm_or_si128(res_hi_bg0, _mm_slli_si128(res_hi_ra0, 2));
__m128i res_lo1 = _mm_or_si128(res_lo_bg1, _mm_slli_si128(res_lo_ra1, 2));
__m128i res_hi1 = _mm_or_si128(res_hi_bg1, _mm_slli_si128(res_hi_ra1, 2));
store_bgr24_sse2(out, res_lo0, res_hi0, res_lo1, res_hi1);
}
for (; w < width; w++)
{
uint32_t col = input[w];
uint32_t r = (col >> 11) & 0x1f;
uint32_t g = (col >> 5) & 0x3f;
uint32_t b = (col >> 0) & 0x1f;
r = (r << 3) | (r >> 2);
g = (g << 2) | (g >> 4);
b = (b << 3) | (b >> 2);
*out++ = b;
*out++ = g;
*out++ = r;
}
}
}
#else
void conv_0rgb1555_bgr24(void *output_, const void *input_,
int width, int height,
@ -250,6 +466,33 @@ void conv_0rgb1555_bgr24(void *output_, const void *input_,
}
}
}
void conv_rgb565_bgr24(void *output_, const void *input_,
int width, int height,
int out_stride, int in_stride)
{
const uint16_t *input = (const uint16_t*)input_;
uint8_t *output = (uint8_t*)output_;
for (int h = 0; h < height; h++, output += out_stride, input += in_stride >> 1)
{
uint8_t *out = output;
for (int w = 0; w < width; w++)
{
uint32_t col = input[w];
uint32_t b = (col >> 0) & 0x1f;
uint32_t g = (col >> 5) & 0x3f;
uint32_t r = (col >> 11) & 0x1f;
b = (b << 3) | (b >> 2);
g = (g << 2) | (g >> 4);
r = (r << 3) | (r >> 2);
*out++ = b;
*out++ = g;
*out++ = r;
}
}
}
#endif
void conv_bgr24_argb8888(void *output_, const void *input_,

View File

@ -20,6 +20,14 @@ void conv_0rgb1555_argb8888(void *output, const void *input,
int width, int height,
int out_stride, int in_stride);
void conv_0rgb1555_rgb565(void *output, const void *input,
int width, int height,
int out_stride, int in_stride);
void conv_rgb565_argb8888(void *output, const void *input,
int width, int height,
int out_stride, int in_stride);
void conv_bgr24_argb8888(void *output, const void *input,
int width, int height,
int out_stride, int in_stride);
@ -28,6 +36,10 @@ void conv_argb8888_0rgb1555(void *output, const void *input,
int width, int height,
int out_stride, int in_stride);
void conv_argb8888_rgb565(void *output, const void *input,
int width, int height,
int out_stride, int in_stride);
void conv_argb8888_bgr24(void *output, const void *input,
int width, int height,
int out_stride, int in_stride);
@ -36,6 +48,10 @@ void conv_0rgb1555_bgr24(void *output, const void *input,
int width, int height,
int out_stride, int in_stride);
void conv_rgb565_bgr24(void *output, const void *input,
int width, int height,
int out_stride, int in_stride);
void conv_copy(void *output, const void *input,
int width, int height,
int out_stride, int in_stride);

View File

@ -68,6 +68,12 @@ static bool set_direct_pix_conv(struct scaler_ctx *ctx)
ctx->direct_pixconv = conv_copy;
else if (ctx->in_fmt == SCALER_FMT_0RGB1555 && ctx->out_fmt == SCALER_FMT_ARGB8888)
ctx->direct_pixconv = conv_0rgb1555_argb8888;
else if (ctx->in_fmt == SCALER_FMT_RGB565 && ctx->out_fmt == SCALER_FMT_ARGB8888)
ctx->direct_pixconv = conv_rgb565_argb8888;
else if (ctx->in_fmt == SCALER_FMT_RGB565 && ctx->out_fmt == SCALER_FMT_BGR24)
ctx->direct_pixconv = conv_rgb565_bgr24;
else if (ctx->in_fmt == SCALER_FMT_0RGB1555 && ctx->out_fmt == SCALER_FMT_RGB565)
ctx->direct_pixconv = conv_0rgb1555_rgb565;
else if (ctx->in_fmt == SCALER_FMT_BGR24 && ctx->out_fmt == SCALER_FMT_ARGB8888)
ctx->direct_pixconv = conv_bgr24_argb8888;
else if (ctx->in_fmt == SCALER_FMT_ARGB8888 && ctx->out_fmt == SCALER_FMT_0RGB1555)
@ -76,6 +82,8 @@ static bool set_direct_pix_conv(struct scaler_ctx *ctx)
ctx->direct_pixconv = conv_argb8888_bgr24;
else if (ctx->in_fmt == SCALER_FMT_0RGB1555 && ctx->out_fmt == SCALER_FMT_BGR24)
ctx->direct_pixconv = conv_0rgb1555_bgr24;
else if (ctx->in_fmt == SCALER_FMT_RGB565 && ctx->out_fmt == SCALER_FMT_BGR24)
ctx->direct_pixconv = conv_rgb565_bgr24;
else
return false;
@ -94,6 +102,10 @@ static bool set_pix_conv(struct scaler_ctx *ctx)
ctx->in_pixconv = conv_0rgb1555_argb8888;
break;
case SCALER_FMT_RGB565:
ctx->in_pixconv = conv_rgb565_argb8888;
break;
case SCALER_FMT_BGR24:
ctx->in_pixconv = conv_bgr24_argb8888;
break;
@ -160,14 +172,6 @@ bool scaler_ctx_gen_filter(struct scaler_ctx *ctx)
void scaler_ctx_gen_reset(struct scaler_ctx *ctx)
{
#ifdef SCALER_PERF
if (ctx->elapsed_frames)
fprintf(stderr, "[Scaler]: ms / frame: %.3f\n", ctx->elapsed_time_ms / ctx->elapsed_frames);
ctx->elapsed_time_ms = 0.0;
ctx->elapsed_frames = 0;
#endif
scaler_free(ctx->horiz.filter);
scaler_free(ctx->horiz.filter_pos);
scaler_free(ctx->vert.filter);

View File

@ -26,6 +26,7 @@ enum scaler_pix_fmt
{
SCALER_FMT_ARGB8888 = 0,
SCALER_FMT_0RGB1555,
SCALER_FMT_RGB565,
SCALER_FMT_BGR24
};

View File

@ -521,8 +521,8 @@ static void load_texture_data(GLuint *obj, const struct texture_image *img, bool
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
#endif
glTexImage2D(GL_TEXTURE_2D,
0, RARCH_GL_INTERNAL_FORMAT, img->width, img->height,
0, RARCH_GL_TEXTURE_TYPE, RARCH_GL_FORMAT32, img->pixels);
0, RARCH_GL_INTERNAL_FORMAT32, img->width, img->height,
0, RARCH_GL_TEXTURE_TYPE32, RARCH_GL_FORMAT32, img->pixels);
free(img->pixels);
}

View File

@ -433,8 +433,8 @@ static bool get_texture_image(const char *shader_path, xmlNodePtr ptr)
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
glTexImage2D(GL_TEXTURE_2D,
0, RARCH_GL_INTERNAL_FORMAT,
img.width, img.height, 0, RARCH_GL_TEXTURE_TYPE, RARCH_GL_FORMAT32, img.pixels);
0, RARCH_GL_INTERNAL_FORMAT32,
img.width, img.height, 0, RARCH_GL_TEXTURE_TYPE32, RARCH_GL_FORMAT32, img.pixels);
pglActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, 0);

View File

@ -355,6 +355,7 @@ enum retro_key
// const enum retro_pixel_format * --
// Sets the internal pixel format used by the implementation.
// The default pixel format is RETRO_PIXEL_FORMAT_0RGB1555.
// This pixel format however, is deprecated (see enum retro_pixel_format).
// If the call returns false, the frontend does not support this pixel format.
// This function should be called inside retro_load_game() or retro_get_system_av_info().
//
@ -368,8 +369,18 @@ enum retro_key
enum retro_pixel_format
{
RETRO_PIXEL_FORMAT_0RGB1555 = 0, // 0RGB1555, native endian. 0 bit must be set to 0.
RETRO_PIXEL_FORMAT_XRGB8888 // XRGB8888, native endian. X bits are ignored.
// 0RGB1555, native endian. 0 bit must be set to 0.
// This pixel format is default for compatibility concerns only.
// If a 15/16-bit pixel format is desired, consider using RGB565.
RETRO_PIXEL_FORMAT_0RGB1555 = 0,
// XRGB8888, native endian. X bits are ignored.
RETRO_PIXEL_FORMAT_XRGB8888 = 1,
// RGB565, native endian. This pixel format is the recommended format to use if a 15/16-bit format is desired
// as it is the pixel format that is typically available on a wide range of low-power devices.
// It is also natively supported in APIs like OpenGL ES.
RETRO_PIXEL_FORMAT_RGB565 = 2
};
struct retro_message
@ -465,6 +476,8 @@ typedef bool (*retro_environment_t)(unsigned cmd, void *data);
// Render a frame. Pixel format is 15-bit 0RGB1555 native endian unless changed (see RETRO_ENVIRONMENT_SET_PIXEL_FORMAT).
// Width and height specify dimensions of buffer.
// Pitch specifices length in bytes between two lines in buffer.
// For performance reasons, it is highly recommended to have a frame that is packed in memory, i.e. pitch == width * byte_per_pixel.
// Certain graphic APIs, such as OpenGL ES, do not like textures that are not packed in memory.
typedef void (*retro_video_refresh_t)(const void *data, unsigned width, unsigned height, size_t pitch);
// Renders a single audio frame. Should only be used if implementation generates a single sample at a time.

View File

@ -175,8 +175,8 @@ static bool ffemu_init_video(struct ff_video_info *video, const struct ffemu_par
switch (param->pix_fmt)
{
case FFEMU_PIX_XRGB1555:
video->scaler.in_fmt = SCALER_FMT_0RGB1555;
case FFEMU_PIX_RGB565:
video->scaler.in_fmt = SCALER_FMT_RGB565;
video->pix_size = 2;
break;

View File

@ -25,7 +25,7 @@ extern "C" {
enum ffemu_pix_format
{
FFEMU_PIX_XRGB1555 = 0,
FFEMU_PIX_RGB565 = 0,
FFEMU_PIX_BGR24,
FFEMU_PIX_ARGB8888
};

View File

@ -24,6 +24,7 @@
#include "file.h"
#include "general.h"
#include "dynamic.h"
#include "benchmark.h"
#include "audio/utils.h"
#include "record/ffemu.h"
#include "rewind.h"
@ -252,6 +253,24 @@ static void video_frame(const void *data, unsigned width, unsigned height, size_
return;
#endif
if (g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_0RGB1555)
{
RARCH_PERFORMANCE_INIT(video_frame_conv);
RARCH_PERFORMANCE_START(video_frame_conv);
driver.scaler.in_width = width;
driver.scaler.in_height = height;
driver.scaler.out_width = width;
driver.scaler.out_height = height;
driver.scaler.in_stride = pitch;
driver.scaler.out_stride = width * sizeof(uint16_t);
scaler_ctx_scale(&driver.scaler, driver.scaler_out, data);
data = driver.scaler_out;
pitch = driver.scaler.out_stride;
RARCH_PERFORMANCE_STOP(video_frame_conv);
RARCH_PERFORMANCE_LOG("video_frame_conv()", video_frame_conv);
}
// Slightly messy code,
// but we really need to do processing before blocking on VSync for best possible scheduling.
#ifdef HAVE_FFMPEG
@ -1228,7 +1247,7 @@ static void init_recording(void)
params.filename = g_extern.record_path;
params.fps = fps;
params.samplerate = samplerate;
params.pix_fmt = g_extern.system.rgb32 ? FFEMU_PIX_ARGB8888 : FFEMU_PIX_XRGB1555;
params.pix_fmt = g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888 ? FFEMU_PIX_ARGB8888 : FFEMU_PIX_RGB565;
if (g_settings.video.gpu_record && driver.video->read_viewport)
{

View File

@ -142,22 +142,35 @@ static void dump_line_16(uint8_t *line, const uint16_t *src, unsigned width)
{
uint16_t pixel = *src++;
uint8_t b = (pixel >> 0) & 0x1f;
uint8_t g = (pixel >> 5) & 0x1f;
uint8_t r = (pixel >> 10) & 0x1f;
uint8_t g = (pixel >> 5) & 0x3f;
uint8_t r = (pixel >> 11) & 0x1f;
*line++ = (b << 3) | (b >> 2);
*line++ = (g << 3) | (g >> 2);
*line++ = (g << 2) | (g >> 4);
*line++ = (r << 3) | (r >> 2);
}
}
static void dump_line_32(uint8_t *line, const uint32_t *src, unsigned width)
{
for (unsigned i = 0; i < width; i++)
{
uint32_t pixel = *src++;
*line++ = (pixel >> 0) & 0xff;
*line++ = (pixel >> 8) & 0xff;
*line++ = (pixel >> 16) & 0xff;
}
}
static void dump_content(FILE *file, const void *frame,
int width, int height, int pitch, bool bgr24)
{
const uint8_t *frame_bgr = (const uint8_t*)frame;
const uint16_t *frame16 = (const uint16_t*)frame;
if (!bgr24)
pitch /= sizeof(uint16_t);
union
{
const uint8_t *u8;
const uint16_t *u16;
const uint32_t *u32;
} u;
u.u8 = (const uint8_t*)frame;
uint8_t **lines = (uint8_t**)calloc(height, sizeof(uint8_t*));
if (!lines)
@ -174,13 +187,18 @@ static void dump_content(FILE *file, const void *frame,
if (bgr24) // BGR24 byte order. Can directly copy.
{
for (int j = 0; j < height; j++, frame_bgr += pitch)
dump_line_bgr(lines[j], frame_bgr, width);
for (int j = 0; j < height; j++, u.u8 += pitch)
dump_line_bgr(lines[j], u.u8, width);
}
else // ARGB1555
else if (g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888)
{
for (int j = 0; j < height; j++, frame16 += pitch)
dump_line_16(lines[j], frame16, width);
for (int j = 0; j < height; j++, u.u8 += pitch)
dump_line_32(lines[j], u.u32, width);
}
else // RGB565
{
for (int j = 0; j < height; j++, u.u8 += pitch)
dump_line_16(lines[j], u.u16, width);
}
#ifdef HAVE_LIBPNG