mirror of
https://github.com/libretro/ppsspp.git
synced 2025-02-02 06:44:45 +00:00
Add a NEON 4444 flip color conversion func.
This commit is contained in:
parent
df53bc2e7b
commit
4900bc9082
@ -469,10 +469,9 @@ void ConvertBGR565ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels) {
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA4444ToABGR4444(u16 *dst, const u16 *src, const u32 numPixels) {
|
||||
void ConvertRGBA4444ToABGR4444Basic(u16 *dst, const u16 *src, const u32 numPixels) {
|
||||
#ifdef _M_SSE
|
||||
const __m128i maskB = _mm_set1_epi16(0x00F0);
|
||||
const __m128i maskG = _mm_set1_epi16(0x0F00);
|
||||
const __m128i mask0040 = _mm_set1_epi16(0x00F0);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
@ -483,8 +482,8 @@ void ConvertRGBA4444ToABGR4444(u16 *dst, const u16 *src, const u32 numPixels) {
|
||||
for (u32 i = 0; i < sseChunks; ++i) {
|
||||
const __m128i c = _mm_load_si128(&srcp[i]);
|
||||
__m128i v = _mm_srli_epi16(c, 12);
|
||||
v = _mm_or_si128(v, _mm_and_si128(_mm_srli_epi16(c, 4), maskB));
|
||||
v = _mm_or_si128(v, _mm_and_si128(_mm_slli_epi16(c, 4), maskG));
|
||||
v = _mm_or_si128(v, _mm_and_si128(_mm_srli_epi16(c, 4), mask0040));
|
||||
v = _mm_or_si128(v, _mm_slli_epi16(_mm_and_si128(c, mask0040), 4));
|
||||
v = _mm_or_si128(v, _mm_slli_epi16(c, 12));
|
||||
_mm_store_si128(&dstp[i], v);
|
||||
}
|
||||
@ -600,7 +599,9 @@ void ConvertRGB565ToBGR565Basic(u16 *dst, const u16 *src, const u32 numPixels) {
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef ConvertRGBA5551ToABGR1555
|
||||
// Reuse the logic from the header - if these aren't defined, we need externs.
|
||||
#ifndef ConvertRGBA4444ToABGR4444
|
||||
Convert16bppTo16bppFunc ConvertRGBA4444ToABGR4444 = &ConvertRGBA4444ToABGR4444Basic;
|
||||
Convert16bppTo16bppFunc ConvertRGBA5551ToABGR1555 = &ConvertRGBA5551ToABGR1555Basic;
|
||||
Convert16bppTo16bppFunc ConvertRGB565ToBGR565 = &ConvertRGB565ToBGR565Basic;
|
||||
#endif
|
||||
@ -608,6 +609,7 @@ Convert16bppTo16bppFunc ConvertRGB565ToBGR565 = &ConvertRGB565ToBGR565Basic;
|
||||
void SetupColorConv() {
|
||||
#if defined(HAVE_ARMV7) && !defined(ARM64)
|
||||
if (cpu_info.bNEON) {
|
||||
ConvertRGBA4444ToABGR4444 = &ConvertRGBA4444ToABGR4444NEON;
|
||||
ConvertRGBA5551ToABGR1555 = &ConvertRGBA5551ToABGR1555NEON;
|
||||
ConvertRGB565ToBGR565 = &ConvertRGB565ToBGR565NEON;
|
||||
}
|
||||
|
@ -130,10 +130,18 @@ void ConvertBGRA4444ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels);
|
||||
void ConvertBGRA5551ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels);
|
||||
void ConvertBGR565ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels);
|
||||
|
||||
void ConvertRGBA4444ToABGR4444(u16 *dst, const u16 *src, const u32 numPixels);
|
||||
void ConvertRGBA4444ToABGR4444Basic(u16 *dst, const u16 *src, const u32 numPixels);
|
||||
void ConvertRGBA5551ToABGR1555Basic(u16 *dst, const u16 *src, const u32 numPixels);
|
||||
void ConvertRGB565ToBGR565Basic(u16 *dst, const u16 *src, const u32 numPixels);
|
||||
|
||||
#if defined(ARM64)
|
||||
#define ConvertRGBA4444ToABGR4444 ConvertRGBA4444ToABGR4444NEON
|
||||
#elif !defined(ARM)
|
||||
#define ConvertRGBA4444ToABGR4444 ConvertRGBA4444ToABGR4444Basic
|
||||
#else
|
||||
extern Convert16bppTo16bppFunc ConvertRGBA4444ToABGR4444;
|
||||
#endif
|
||||
|
||||
#if defined(ARM64)
|
||||
#define ConvertRGBA5551ToABGR1555 ConvertRGBA5551ToABGR1555NEON
|
||||
#elif !defined(ARM)
|
||||
|
@ -26,6 +26,34 @@
|
||||
|
||||
// TODO: More NEON color conversion funcs.
|
||||
|
||||
void ConvertRGBA4444ToABGR4444NEON(u16 *dst, const u16 *src, const u32 numPixels) {
|
||||
const uint16x8_t mask0040 = vdupq_n_u16(0x00F0);
|
||||
|
||||
u32 simdable = (numPixels / 8) * 8;
|
||||
const u16 *srcp = src;
|
||||
u16 *dstp = dst;
|
||||
for (u32 i = 0; i < simdable; i += 8) {
|
||||
uint16x8_t c = vld1q_u16(srcp);
|
||||
|
||||
const uint16x8_t a = vshrq_n_u16(c, 12);
|
||||
const uint16x8_t b = vandq_u16(vshrq_n_u16(c, 4), mask0040);
|
||||
const uint16x8_t g = vshlq_n_u16(vandq_u16(c, mask0040), 4);
|
||||
const uint16x8_t r = vshlq_n_u16(c, 12);
|
||||
|
||||
uint16x8_t res = vorrq_u16(vorrq_u16(r, g), vorrq_u16(b, a));
|
||||
vst1q_u16(dstp, res);
|
||||
|
||||
srcp += 8;
|
||||
dstp += 8;
|
||||
}
|
||||
|
||||
// Finish off the rest, if there were any outside the simdable range.
|
||||
if (numPixels > simdable) {
|
||||
// Note that we've already moved srcp/dstp forward.
|
||||
ConvertRGBA4444ToABGR4444Basic(dst, src, numPixels - simdable);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA5551ToABGR1555NEON(u16 *dst, const u16 *src, const u32 numPixels) {
|
||||
const uint16x8_t maskB = vdupq_n_u16(0x003E);
|
||||
const uint16x8_t maskG = vdupq_n_u16(0x07C0);
|
||||
|
@ -19,5 +19,6 @@
|
||||
|
||||
#include "ColorConv.h"
|
||||
|
||||
void ConvertRGBA4444ToABGR4444NEON(u16 *dst, const u16 *src, const u32 numPixels);
|
||||
void ConvertRGBA5551ToABGR1555NEON(u16 *dst, const u16 *src, const u32 numPixels);
|
||||
void ConvertRGB565ToBGR565NEON(u16 *dst, const u16 *src, const u32 numPixels);
|
||||
|
Loading…
x
Reference in New Issue
Block a user